From 34652d296cf604a141496cc8b4eaaee330c1e8a7 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Mon, 13 Oct 2025 13:54:28 +0200 Subject: [PATCH 1/5] fix: change project settings to avoid mypy false import flagging --- pyproject.toml | 4 +++- tox.ini | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7be71c1..5dabbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ target-version = ['py311'] [tool.mypy] python_version = "3.11" +mypy_path = ["${MYPY_CONFIG_FILE_DIR}/scripts"] [[tool.mypy.overrides]] module = [ @@ -17,7 +18,8 @@ module = [ "requests.*", ] ignore_missing_imports = true +check_untyped_defs = true + [tool.ruff] -# Aligned with the default line length of Black line-length = 88 diff --git a/tox.ini b/tox.ini index bb43f0e..d3a7061 100644 --- a/tox.ini +++ b/tox.ini @@ -19,7 +19,9 @@ deps = -r requirements-dev.txt allowlist_externals = mypy -commands = mypy scripts tests --check-untyped-defs + sh +commands = + sh -c 'cd scripts && mypy --explicit-package-bases --namespace-packages . ../tests --check-untyped-defs' [testenv:lint] description = Lint and format check the source code with black and ruff From dcbeac110a92012a0639d7fc8a1b2e3e664c4009 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Mon, 13 Oct 2025 14:53:15 +0200 Subject: [PATCH 2/5] refactor: centralize argument parsing for top level scripts --- scripts/backup.py | 82 ++------- scripts/custom_fields.py | 64 +------ scripts/permission_mgmt.py | 61 +------ scripts/restore.py | 57 +----- scripts/user_data_filter_mgmt.py | 72 +------- scripts/user_group_mgmt.py | 100 +--------- scripts/user_mgmt.py | 88 +-------- scripts/utils/args/__init__.py | 0 scripts/utils/args/parser.py | 304 +++++++++++++++++++++++++++++++ scripts/utils/args/schemas.py | 147 +++++++++++++++ scripts/utils/args/validators.py | 33 ++++ scripts/utils/utils.py | 22 ++- scripts/workspace_mgmt.py | 78 +------- 13 files changed, 561 insertions(+), 547 deletions(-) create mode 100644 scripts/utils/args/__init__.py create mode 100644 scripts/utils/args/parser.py create mode 100644 scripts/utils/args/schemas.py create mode 100644 scripts/utils/args/validators.py diff --git a/scripts/backup.py b/scripts/backup.py index 371e566..90f6ee6 100644 --- a/scripts/backup.py +++ b/scripts/backup.py @@ -1,6 +1,5 @@ # (C) 2025 GoodData Corporation import abc -import argparse import json import logging import os @@ -16,18 +15,19 @@ import requests import yaml from gooddata_sdk.sdk import GoodDataSdk -from utils.backup_utils.input_loader import InputLoader # type: ignore[import] -from utils.constants import ( # type: ignore[import] +from utils.args.parser import Parser +from utils.args.schemas import BackupArgs +from utils.backup_utils.input_loader import InputLoader +from utils.constants import ( BackupSettings, DirNames, - GoodDataProfile, ) -from utils.gd_api import ( # type: ignore[import] +from utils.gd_api import ( GDApi, GoodDataRestApiError, ) -from utils.logger import setup_logging # type: ignore[import] -from utils.models.batch import BackupBatch, Size # type: ignore[import] +from utils.logger import setup_logging +from utils.models.batch import BackupBatch, Size setup_logging() module_name = __file__.split(os.sep)[-1] @@ -154,44 +154,6 @@ def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: return GDApi(hostname, token) -def create_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() - - parser.add_argument( - "ws_csv", - help="Path to csv with IDs of GD workspaces to backup.", - type=Path, - nargs="?", - ) - parser.add_argument( - "conf", help="Path to backup storage configuration file.", type=Path - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=GoodDataProfile.PROFILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{GoodDataProfile.PROFILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If not profile is provided, "default" is used.', - ) - parser.add_argument( - "-t", - "--input-type", - type=str, - choices=["list-of-workspaces", "list-of-parents", "entire-organization"], - default="list-of-workspaces", - help="Type of input to use as the base of the backup. If not provided, `list-of-workspaces` is used as default.", - ) - - return parser - - def write_to_yaml(folder, source): """Writes the source to a YAML file.""" with open(folder, "w") as outfile: @@ -363,7 +325,7 @@ def archive_gooddata_layouts_to_zip(folder: str) -> None: shutil.rmtree(target_subdir) -def create_client(args: argparse.Namespace) -> tuple[GoodDataSdk, GDApi]: +def create_client(args: BackupArgs) -> tuple[GoodDataSdk, GDApi]: """Creates a GoodData client.""" gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") gdc_hostname = os.environ.get("GDC_HOSTNAME") @@ -387,24 +349,6 @@ def create_client(args: argparse.Namespace) -> tuple[GoodDataSdk, GDApi]: ) -def validate_args(args: argparse.Namespace) -> None: - """Validates the arguments provided.""" - if args.input_type != "entire-organization": - if not args.ws_csv: - raise RuntimeError("Path to csv with workspace IDs is required.") - if not os.path.exists(args.ws_csv): - raise RuntimeError("Invalid path to csv given.") - - if not os.path.exists(args.conf): - raise RuntimeError("Invalid path to backup storage configuration given.") - - if args.input_type == "entire-organization" and args.ws_csv: - logger.warning( - "Input type is set to 'entire-organization', but a CSV file is provided. " - "The CSV file will be ignored." - ) - - def split_to_batches( workspaces_to_export: list[str], batch_size: Size ) -> list[BackupBatch]: @@ -509,20 +453,20 @@ def process_batches_in_parallel( raise -def main(args: argparse.Namespace) -> None: +def main(args: BackupArgs) -> None: """Main function for the backup script.""" sdk, api = create_client(args) org_id: str = sdk.catalog_organization.organization_id - conf: BackupRestoreConfig = BackupRestoreConfig(args.conf) + conf: BackupRestoreConfig = BackupRestoreConfig(str(args.conf)) storage_class: Type[BackupStorage] = get_storage(conf.storage_type) storage: BackupStorage = storage_class(conf) loader = InputLoader(api, conf.api_page_size) workspaces_to_export: list[str] = loader.get_ids_to_backup( - args.input_type, args.ws_csv + args.input_type, str(args.ws_csv) ) batches = split_to_batches(workspaces_to_export, conf.batch_size) @@ -535,11 +479,9 @@ def main(args: argparse.Namespace) -> None: def backup(): - parser: argparse.ArgumentParser = create_parser() - args: argparse.Namespace = parser.parse_args() + args: BackupArgs = Parser.parse_backup_args() try: - validate_args(args) main(args) logger.info("Backup completed!") diff --git a/scripts/custom_fields.py b/scripts/custom_fields.py index fbeae25..cec603b 100644 --- a/scripts/custom_fields.py +++ b/scripts/custom_fields.py @@ -5,17 +5,15 @@ Documentation and usage instructions are located in `docs/CUSTOM_FIELDS.md` file. """ -import argparse -from pathlib import Path - from gooddata_pipelines import ( CustomDatasetDefinition, CustomFieldDefinition, LdmExtensionManager, ) -from gooddata_sdk.utils import PROFILES_FILE_PATH -from utils.logger import get_logger, setup_logging # type: ignore[import] -from utils.utils import ( # type: ignore[import] +from utils.args.parser import Parser +from utils.args.schemas import CustomFieldsArgs +from utils.logger import get_logger, setup_logging +from utils.utils import ( create_client, read_csv_file_to_dict, ) @@ -27,14 +25,11 @@ def custom_fields() -> None: """Main function to run the custom fields script.""" - args: argparse.Namespace = parse_args() - path_to_custom_datasets_csv = args.path_to_custom_datasets_csv - path_to_custom_fields_csv = args.path_to_custom_fields_csv - check_relations: bool = args.check_relations + args: CustomFieldsArgs = Parser.parse_custom_fields_args() # Load input data from csv files raw_custom_datasets: list[dict[str, str]] = read_csv_file_to_dict( - path_to_custom_datasets_csv + args.path_to_custom_datasets_csv, args.delimiter, args.quotechar ) custom_datasets = [ @@ -43,7 +38,7 @@ def custom_fields() -> None: ] raw_custom_fields: list[dict[str, str]] = read_csv_file_to_dict( - path_to_custom_fields_csv + args.path_to_custom_fields_csv, args.delimiter, args.quotechar ) custom_fields = [ @@ -58,50 +53,7 @@ def custom_fields() -> None: manager.logger.subscribe(logger) # Process the custom datasets and fields - manager.process(custom_datasets, custom_fields, check_relations) - - -def parse_args(): - """Parse command line arguments.""" - parser = argparse.ArgumentParser(description="Custom Fields Script") - parser.add_argument( - "path_to_custom_datasets_csv", - type=str, - help="Path to the CSV file containing custom datasets definitions.", - ) - - parser.add_argument( - "path_to_custom_fields_csv", - type=str, - help="Path to the CSV file containing custom fields definitions.", - ) - - parser.add_argument( - "--no-relations-check", - action="store_false", - dest="check_relations", - help="Check relations after updating LLM. " - + "If new ivalid relations are found, the update is rolled back. " - + "Boolean, defaults to True.", - ) - - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=PROFILES_FILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', - ) - - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If no profile is provided, "default" is used.', - ) - - return parser.parse_args() + manager.process(custom_datasets, custom_fields, args.check_relations) if __name__ == "__main__": diff --git a/scripts/permission_mgmt.py b/scripts/permission_mgmt.py index f2261b5..8fc503a 100644 --- a/scripts/permission_mgmt.py +++ b/scripts/permission_mgmt.py @@ -1,16 +1,14 @@ # (C) 2025 GoodData Corporation -import argparse -import os -from pathlib import Path from gooddata_pipelines import ( EntityType, PermissionIncrementalLoad, PermissionProvisioner, ) -from gooddata_sdk.utils import PROFILES_FILE_PATH -from utils.logger import get_logger, setup_logging # type: ignore[import] -from utils.utils import ( # type: ignore[import] +from utils.args.parser import Parser +from utils.args.schemas import PermissionArgs +from utils.logger import get_logger, setup_logging +from utils.utils import ( create_client, read_csv_file_to_dict, ) @@ -19,46 +17,14 @@ logger = get_logger(__name__) -def create_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Management of workspace permissions.") - parser.add_argument( - "perm_csv", - type=Path, - help=( - "Path to (comma-delimited) csv with user/userGroup " - "to workspace permission pairs." - ), - ) - parser.add_argument( - "-d", - "--delimiter", - type=str, - default=",", - help="Delimiter used to separate different columns in the user_csv.", - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=PROFILES_FILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If not profile is provided, "default" is used.', - ) - return parser - - def read_permissions_from_csv( - args: argparse.Namespace, + args: PermissionArgs, ) -> list[PermissionIncrementalLoad]: """Reads permissions from the input csv file.""" validated_permissions: list[PermissionIncrementalLoad] = [] - raw_permissions = read_csv_file_to_dict(args.perm_csv, args.delimiter) + raw_permissions = read_csv_file_to_dict( + args.perm_csv, args.delimiter, args.quotechar + ) for raw_permission in raw_permissions: try: @@ -100,17 +66,8 @@ def read_permissions_from_csv( return validated_permissions -def validate_args(args: argparse.Namespace) -> None: - """Validates the input arguments.""" - if not os.path.exists(args.perm_csv): - raise RuntimeError( - "Invalid path to workspace permission management input csv given." - ) - - def permission_mgmt(): - parser = create_parser() - args = parser.parse_args() + args = Parser.parse_permission_args() permissions = read_permissions_from_csv(args) diff --git a/scripts/restore.py b/scripts/restore.py index 83d5f92..8448662 100644 --- a/scripts/restore.py +++ b/scripts/restore.py @@ -1,6 +1,5 @@ # (C) 2025 GoodData Corporation import abc -import argparse import csv import json import logging @@ -27,8 +26,10 @@ CatalogDeclarativeFilterView, ) from gooddata_sdk.sdk import GoodDataSdk -from utils.constants import DirNames, GoodDataProfile # type: ignore[import] -from utils.logger import setup_logging # type: ignore[import] +from utils.args.parser import Parser +from utils.args.schemas import RestoreArgs +from utils.constants import DirNames +from utils.logger import setup_logging BEARER_TKN_PREFIX = "Bearer" @@ -224,34 +225,6 @@ def _resolve_return_code( ) -def create_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() - parser.add_argument( - "ws_csv", type=Path, help="Path to csv with IDs of GD workspaces to restore." - ) - parser.add_argument( - "conf", type=Path, help="Path to backup storage configuration file." - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=GoodDataProfile.PROFILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{GoodDataProfile.PROFILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If not profile is provided, "default" is used.', - ) - parser.add_argument( - "-v", "--verbose", action="store_true", help="Turns on the debug log output." - ) - return parser - - def read_targets_from_csv(csv_path: str) -> dict[str, str]: """Reads the csv file with workspace IDs and paths to backups.""" # TODO - handling of csv files with and without headers @@ -567,7 +540,7 @@ def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: return GDApi(hostname, token) -def create_client(args: argparse.Namespace) -> tuple[GoodDataSdk, GDApi]: +def create_client(args: RestoreArgs) -> tuple[GoodDataSdk, GDApi]: """Creates GoodData SDK and API clients.""" gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") gdc_hostname = os.environ.get("GDC_HOSTNAME") @@ -591,33 +564,19 @@ def create_client(args: argparse.Namespace) -> tuple[GoodDataSdk, GDApi]: ) -def validate_args(args: argparse.Namespace) -> None: - """Validates the arguments provided.""" - if not os.path.exists(args.ws_csv): - raise RuntimeError("Invalid path to csv given.") - - if not os.path.exists(args.conf): - raise RuntimeError("Invalid path to backup storage configuration given.") - - def restore(): """Main entry point of the script.""" - parser = create_parser() - args = parser.parse_args() - validate_args(args) - - if args.verbose: - logger.setLevel(logging.DEBUG) + args: RestoreArgs = Parser.parse_restore_args() sdk, api = create_client(args) - conf = BackupRestoreConfig(args.conf) + conf = BackupRestoreConfig(str(args.conf)) cls_storage: type[BackupStorage] = get_storage(conf.storage_type) storage = cls_storage(conf) - ws_paths = read_targets_from_csv(args.ws_csv) + ws_paths = read_targets_from_csv(str(args.ws_csv)) validate_targets(sdk, ws_paths) restore_worker = RestoreWorker(sdk, api, storage, ws_paths) diff --git a/scripts/user_data_filter_mgmt.py b/scripts/user_data_filter_mgmt.py index f06c80c..5b0f9a8 100644 --- a/scripts/user_data_filter_mgmt.py +++ b/scripts/user_data_filter_mgmt.py @@ -1,13 +1,11 @@ # (C) 2025 GoodData Corporation -import argparse -import os -from pathlib import Path + from typing import Any from gooddata_pipelines import UserDataFilterFullLoad, UserDataFilterProvisioner -from gooddata_sdk.utils import PROFILES_FILE_PATH -from utils.logger import get_logger, setup_logging # type: ignore[import] -from utils.utils import ( # type: ignore[import] +from utils.args.parser import Parser +from utils.logger import get_logger, setup_logging +from utils.utils import ( create_client, read_csv_file_to_dict, ) @@ -17,63 +15,6 @@ logger = get_logger(__name__) -def create_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Management of workspaces.") - parser.add_argument( - "filepath", - type=Path, - help="Path to csv with input data.", - ) - parser.add_argument( - "ldm_column_name", - type=str, - help="LDM column name.", - ) - parser.add_argument( - "maql_column_name", - type=str, - help="MAQL column name: {attribute/dataset.field}", - ) - parser.add_argument( - "-d", - "--delimiter", - type=str, - default=",", - help="Delimiter used to separate different columns in the workspace_csv.", - ) - parser.add_argument( - "-q", - "--quotechar", - type=str, - default='"', - help=( - "Character used for quoting (escaping) values " - "which contain delimiters or quotechars." - ), - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=PROFILES_FILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If no profile is provided, "default" is used.', - ) - return parser - - -def validate_args(args: argparse.Namespace) -> None: - """Validates the input arguments.""" - if not os.path.exists(args.filepath): - raise RuntimeError("Invalid path to input csv given.") - - def validate_user_data_filter_data( raw_user_data_filters: list[dict[str, Any]], ) -> list[UserDataFilterFullLoad]: @@ -95,10 +36,7 @@ def udf_mgmt(): """Main function for workspace management.""" # Create parser and parse arguments - parser = create_parser() - args = parser.parse_args() - - validate_args(args) + args = Parser.parse_user_data_filter_args() # Read CSV input raw_user_data_filters = read_csv_file_to_dict( diff --git a/scripts/user_group_mgmt.py b/scripts/user_group_mgmt.py index f26810d..2792179 100644 --- a/scripts/user_group_mgmt.py +++ b/scripts/user_group_mgmt.py @@ -10,101 +10,24 @@ # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import argparse -import os -import re -from pathlib import Path + +from typing import Any from gooddata_pipelines import ( UserGroupIncrementalLoad, UserGroupProvisioner, ) -from gooddata_sdk.utils import PROFILES_FILE_PATH -from utils.logger import get_logger, setup_logging # type: ignore[import] -from utils.utils import ( # type: ignore[import] - create_client, - read_csv_file_to_dict, -) +from utils.args.parser import Parser +from utils.args.schemas import UserGroupArgs +from utils.logger import get_logger, setup_logging +from utils.utils import create_client, read_csv_file_to_dict setup_logging() logger = get_logger(__name__) -UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" - - -def create_parser() -> argparse.ArgumentParser: - """Creates an argument parser.""" - parser = argparse.ArgumentParser(description="Management of users and userGroups.") - parser.add_argument( - "user_group_csv", type=Path, help="Path to csv with user groups definition." - ) - parser.add_argument( - "-d", - "--delimiter", - type=str, - default=",", - help="Delimiter used to separate different columns in the user_group_csv.", - ) - parser.add_argument( - "-u", - "--ug_delimiter", - type=str, - default="|", - help=( - "Delimiter used to separate different parent user groups within " - "the parent user group column in the user_group_csv. " - 'This must differ from the "delimiter" argument.' - ), - ) - parser.add_argument( - "-q", - "--quotechar", - type=str, - default='"', - help=( - "Character used for quoting (escaping) values " - "which contain delimiters or quotechars." - ), - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=PROFILES_FILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If no profile is provided, "default" is used.', - ) - return parser - - -def validate_args(args: argparse.Namespace) -> None: - """Validates the arguments provided.""" - if not os.path.exists(args.user_group_csv): - raise RuntimeError("Invalid path to user management input csv given.") - - if args.delimiter == args.ug_delimiter: - raise RuntimeError( - "Delimiter and ParentUserGroups Delimiter cannot be the same." - ) - - if args.ug_delimiter == "." or re.match(UG_REGEX, args.ug_delimiter): - raise RuntimeError( - 'ParentUserGroups delimiter cannot be dot (".") ' - f'or match the following regex: "{UG_REGEX}".' - ) - - if len(args.quotechar) != 1: - raise RuntimeError("The quotechar argument must be exactly one character long.") - def read_users_groups_from_csv( - args: argparse.Namespace, + args: UserGroupArgs, ) -> list[UserGroupIncrementalLoad]: """Reads users from csv file.""" user_groups: list[UserGroupIncrementalLoad] = [] @@ -112,12 +35,12 @@ def read_users_groups_from_csv( args.user_group_csv, args.delimiter, args.quotechar ) for raw_user_group in raw_user_groups: - processed_user_group = dict(raw_user_group) + processed_user_group: dict[str, Any] = dict(raw_user_group) parent_user_groups = raw_user_group["parent_user_groups"] if parent_user_groups: processed_user_group["parent_user_groups"] = parent_user_groups.split( - args.ug_delimiter + args.inner_delimiter ) else: processed_user_group["parent_user_groups"] = [] @@ -136,12 +59,9 @@ def read_users_groups_from_csv( def user_group_mgmt(): """Main function for user management.""" - parser = create_parser() - args = parser.parse_args() + args = Parser.parse_user_group_args() try: - validate_args(args) - provisioner = create_client( UserGroupProvisioner, args.profile_config, args.profile ) diff --git a/scripts/user_mgmt.py b/scripts/user_mgmt.py index 06da07d..620d368 100644 --- a/scripts/user_mgmt.py +++ b/scripts/user_mgmt.py @@ -1,73 +1,19 @@ # (C) 2025 GoodData Corporation -import argparse + import csv -import os -import re from pathlib import Path from gooddata_pipelines import UserIncrementalLoad, UserProvisioner -from gooddata_sdk.utils import PROFILES_FILE_PATH -from utils.logger import get_logger, setup_logging # type: ignore[import] -from utils.utils import create_client # type: ignore[import] - -UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" +from utils.args.parser import Parser +from utils.logger import get_logger, setup_logging +from utils.utils import create_client setup_logging() logger = get_logger(__name__) -def create_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Management of users and userGroups.") - parser.add_argument( - "user_csv", type=Path, help="Path to csv with user definitions." - ) - parser.add_argument( - "-d", - "--delimiter", - type=str, - default=",", - help="Delimiter used to separate different columns in the user_csv.", - ) - parser.add_argument( - "-u", - "--ug_delimiter", - type=str, - default="|", - help=( - "Delimiter used to separate different user groups within " - "the relevant user groups column in the user_csv. " - 'This must differ from the "delimiter" argument.' - ), - ) - parser.add_argument( - "-q", - "--quotechar", - type=str, - default='"', - help=( - "Character used for quoting (escaping) values " - "which contain delimiters or quotechars." - ), - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=PROFILES_FILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If no profile is provided, "default" is used.', - ) - return parser - - def read_users_from_csv( - path_to_csv: str, row_delimiter: str, quotechar: str, user_group_delimiter: str + path_to_csv: Path, row_delimiter: str, quotechar: str, user_group_delimiter: str ) -> list[UserIncrementalLoad]: """Reads users from csv file.""" @@ -106,33 +52,13 @@ def read_users_from_csv( return users -def validate_args(args: argparse.Namespace) -> None: - """Validates the arguments provided.""" - if not os.path.exists(args.user_csv): - raise RuntimeError("Invalid path to user management input csv given.") - - if args.delimiter == args.ug_delimiter: - raise RuntimeError("Delimiter and UserGroups Delimiter cannot be the same.") - - if args.ug_delimiter == "." or re.match(UG_REGEX, args.ug_delimiter): - raise RuntimeError( - 'Usergroup delimiter cannot be dot (".") ' - f'or match the following regex: "{UG_REGEX}".' - ) - - if len(args.quotechar) != 1: - raise RuntimeError("The quotechar argument must be exactly one character long.") - - def user_mgmt() -> None: """Main function for user management.""" - parser = create_parser() - args = parser.parse_args() - validate_args(args) + args = Parser.parse_user_args() users = read_users_from_csv( - args.user_csv, args.delimiter, args.quotechar, args.ug_delimiter + args.user_csv, args.delimiter, args.quotechar, args.inner_delimiter ) provisioner = create_client(UserProvisioner, args.profile_config, args.profile) diff --git a/scripts/utils/args/__init__.py b/scripts/utils/args/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/utils/args/parser.py b/scripts/utils/args/parser.py new file mode 100644 index 0000000..b3efece --- /dev/null +++ b/scripts/utils/args/parser.py @@ -0,0 +1,304 @@ +import argparse +from pathlib import Path + +from gooddata_sdk.utils import PROFILES_FILE_PATH +from utils.args.schemas import ( + BackupArgs, + CustomFieldsArgs, + PermissionArgs, + RestoreArgs, + UserArgs, + UserDataFilterArgs, + UserGroupArgs, + WorkspaceArgs, +) + + +class Parser: + """Interface to handle common command line arguments. + + Use `parse_*` class methods to parse command line arguments for each script. + """ + + parser: argparse.ArgumentParser + + DEFAULT_DELIMITER = "," + DEFAULT_INNER_DELIMITER = "|" + DEFAULT_QUOTECHAR = '"' + + def __init__(self, description: str) -> None: + self.parser = argparse.ArgumentParser(description=description) + + @classmethod + def parse_workspace_args(cls) -> WorkspaceArgs: + """Parses workspace management command line arguments.""" + parser = cls("Management of workspaces.") + + parser._add_file_path("filepath", "Path to CSV file with input data.") + parser._add_common_args() + + namespace = parser.parser.parse_args() + + return WorkspaceArgs( + filepath=namespace.filepath, + delimiter=namespace.delimiter, + inner_delimiter=namespace.inner_delimiter, + quotechar=namespace.quotechar, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + @classmethod + def parse_user_args(cls) -> UserArgs: + """Parses user management command line arguments.""" + parser = cls("Management of users and userGroups.") + + parser._add_file_path("user_csv", "Path to csv with user definitions.") + parser._add_common_args() + + namespace = parser.parser.parse_args() + + return UserArgs( + user_csv=namespace.user_csv, + delimiter=namespace.delimiter, + inner_delimiter=namespace.inner_delimiter, + quotechar=namespace.quotechar, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + @classmethod + def parse_user_group_args(cls) -> UserGroupArgs: + """Parses user group management command line arguments.""" + parser = cls("Management of users and userGroups.") + + parser._add_file_path( + "user_group_csv", "Path to csv with user groups definitions." + ) + parser._add_common_args() + + namespace = parser.parser.parse_args() + + return UserGroupArgs( + user_group_csv=namespace.user_group_csv, + delimiter=namespace.delimiter, + inner_delimiter=namespace.inner_delimiter, + quotechar=namespace.quotechar, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + @classmethod + def parse_permission_args(cls) -> PermissionArgs: + """Parses permission management command line arguments.""" + parser = cls("Management of workspace permissions.") + parser._add_file_path("perm_csv", "Path to csv with permission definitions.") + + parser._add_csv_args() + parser._add_profile_args() + + namespace = parser.parser.parse_args() + + return PermissionArgs( + perm_csv=namespace.perm_csv, + delimiter=namespace.delimiter, + quotechar=namespace.quotechar, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + @classmethod + def parse_user_data_filter_args(cls) -> UserDataFilterArgs: + """Parses user data filter management command line arguments.""" + parser = cls("Management of user data filters.") + + parser._add_file_path("filepath", "Path to csv with input data.") + + parser._add_ldm_column_name() + parser._add_maql_column_name() + + parser._add_csv_args() + parser._add_profile_args() + + namespace = parser.parser.parse_args() + + return UserDataFilterArgs( + filepath=namespace.filepath, + ldm_column_name=namespace.ldm_column_name, + maql_column_name=namespace.maql_column_name, + delimiter=namespace.delimiter, + quotechar=namespace.quotechar, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + @classmethod + def parse_custom_fields_args(cls) -> CustomFieldsArgs: + """Parses custom fields management command line arguments.""" + parser = cls("Management of custom fields.") + parser._add_file_path( + "path_to_custom_datasets_csv", + "Path to csv with custom datasets definitions.", + ) + parser._add_file_path( + "path_to_custom_fields_csv", "Path to csv with custom fields definitions." + ) + + parser._add_csv_args() + parser._add_profile_args() + + parser._add_check_relations() + + namespace = parser.parser.parse_args() + + return CustomFieldsArgs( + path_to_custom_datasets_csv=namespace.path_to_custom_datasets_csv, + path_to_custom_fields_csv=namespace.path_to_custom_fields_csv, + delimiter=namespace.delimiter, + quotechar=namespace.quotechar, + check_relations=namespace.check_relations, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + @classmethod + def parse_backup_args(cls) -> BackupArgs: + """Parses backup management command line arguments.""" + parser = cls("Backup of workspaces.") + parser._add_file_path("ws_csv", "Path to csv with input data.") + parser._add_file_path("conf", "Path to backup storage configuration file.") + parser._add_profile_args() + parser._add_input_type() + + namespace = parser.parser.parse_args() + + return BackupArgs( + ws_csv=namespace.ws_csv, + conf=namespace.conf, + profile_config=namespace.profile_config, + profile=namespace.profile, + input_type=namespace.input_type, + ) + + @classmethod + def parse_restore_args(cls) -> RestoreArgs: + """Parses restore management command line arguments.""" + parser = cls("Restore of workspaces.") + parser._add_file_path("ws_csv", "Path to csv with input data.") + parser._add_file_path("conf", "Path to backup storage configuration file.") + parser._add_profile_args() + + namespace = parser.parser.parse_args() + + return RestoreArgs( + ws_csv=namespace.ws_csv, + conf=namespace.conf, + profile_config=namespace.profile_config, + profile=namespace.profile, + ) + + def _add_common_args(self) -> None: + self._add_csv_args() + self._add_inner_delimiter() + + self._add_profile_args() + + def _add_profile_args(self) -> None: + self._add_profile_config() + self._add_profile() + + def _add_csv_args(self) -> None: + self._add_quotechar() + self._add_delimiter() + + def _add_file_path(self, name: str, help: str) -> None: + self.parser.add_argument( + name, + type=Path, + help=help, + ) + + def _add_delimiter(self) -> None: + self.parser.add_argument( + "-d", + "--delimiter", + type=str, + default=self.DEFAULT_DELIMITER, + help="Delimiter used to separate different columns in the input csv.", + ) + + def _add_inner_delimiter(self) -> None: + self.parser.add_argument( + "-i", + "--inner-delimiter", + type=str, + default=self.DEFAULT_INNER_DELIMITER, + help=( + "Delimiter used to separate different inner values within " + "the columns in the input csv which contain inner-delimiter " + "separated values. This must differ from the 'delimiter' argument." + ), + ) + + def _add_quotechar(self) -> None: + self.parser.add_argument( + "-q", + "--quotechar", + type=str, + default=self.DEFAULT_QUOTECHAR, + help="Character used for quoting (escaping) values which contain " + "delimiters or quotechars.", + ) + + def _add_profile_config(self) -> None: + self.parser.add_argument( + "-p", + "--profile-config", + type=Path, + default=PROFILES_FILE_PATH, + help="Optional path to GoodData profile config. If no path is " + "provided, the default profiles file is used.", + ) + + def _add_profile(self) -> None: + self.parser.add_argument( + "--profile", + type=str, + default="default", + help="GoodData profile to use. If no profile is provided, 'default' " + "is used.", + ) + + def _add_check_relations(self) -> None: + self.parser.add_argument( + "--no-relations-check", + action="store_false", + dest="check_relations", + help="Check relations after updating LLM. " + + "If new invalid relations are found, the update is rolled back. " + + "Boolean, defaults to True.", + ) + + def _add_ldm_column_name(self) -> None: + self.parser.add_argument( + "ldm_column_name", + type=str, + help="LDM column name.", + ) + + def _add_maql_column_name(self) -> None: + self.parser.add_argument( + "maql_column_name", + type=str, + help="MAQL column name: {attribute/dataset.field}", + ) + + def _add_input_type(self) -> None: + self.parser.add_argument( + "-t", + "--input-type", + type=str, + choices=["list-of-workspaces", "list-of-parents", "entire-organization"], + default="list-of-workspaces", + help="Type of input to use as the base of the backup. If not provided, `list-of-workspaces` is used as default.", + ) diff --git a/scripts/utils/args/schemas.py b/scripts/utils/args/schemas.py new file mode 100644 index 0000000..d3a1a3f --- /dev/null +++ b/scripts/utils/args/schemas.py @@ -0,0 +1,147 @@ +# (C) 2025 GoodData Corporation + +from dataclasses import dataclass +from pathlib import Path + +from utils.args.validators import ( + delimiters_must_be_different, + inner_delimiter_must_be_valid, + path_must_exist, + quotechar_must_be_valid, +) + + +@dataclass +class UserArgs: + """Schema for user management command line arguments.""" + + user_csv: Path + delimiter: str + inner_delimiter: str + quotechar: str + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.user_csv) + delimiters_must_be_different(self.delimiter, self.inner_delimiter) + inner_delimiter_must_be_valid(self.inner_delimiter) + quotechar_must_be_valid(self.quotechar) + + +@dataclass +class WorkspaceArgs: + """Schema for workspace management CLI arguments.""" + + filepath: Path + delimiter: str + inner_delimiter: str + quotechar: str + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.filepath) + delimiters_must_be_different(self.delimiter, self.inner_delimiter) + inner_delimiter_must_be_valid(self.inner_delimiter) + quotechar_must_be_valid(self.quotechar) + + +@dataclass +class UserGroupArgs: + """Schema for user group management command line arguments.""" + + user_group_csv: Path + delimiter: str + inner_delimiter: str + quotechar: str + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.user_group_csv) + delimiters_must_be_different(self.delimiter, self.inner_delimiter) + inner_delimiter_must_be_valid(self.inner_delimiter) + quotechar_must_be_valid(self.quotechar) + + +@dataclass +class PermissionArgs: + """Schema for permission management command line arguments.""" + + perm_csv: Path + delimiter: str + quotechar: str + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.perm_csv) + quotechar_must_be_valid(self.quotechar) + + +@dataclass +class UserDataFilterArgs: + """Schema for user data filter management command line arguments.""" + + filepath: Path + ldm_column_name: str + maql_column_name: str + delimiter: str + quotechar: str + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.filepath) + quotechar_must_be_valid(self.quotechar) + + +@dataclass +class CustomFieldsArgs: + """Schema for custom fields management command line arguments.""" + + path_to_custom_datasets_csv: Path + path_to_custom_fields_csv: Path + delimiter: str + quotechar: str + check_relations: bool + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.path_to_custom_datasets_csv) + path_must_exist(self.path_to_custom_fields_csv) + quotechar_must_be_valid(self.quotechar) + + +@dataclass +class BackupArgs: + """Schema for backup management command line arguments.""" + + ws_csv: Path + conf: Path + profile_config: Path + profile: str + input_type: str + + def __post_init__(self) -> None: + path_must_exist(self.conf) + + # If input type is not entire organization, ws_csv must be provided + if self.input_type != "entire-organization": + path_must_exist(self.ws_csv) + + +@dataclass +class RestoreArgs: + """Schema for restore management command line arguments.""" + + ws_csv: Path + conf: Path + profile_config: Path + profile: str + + def __post_init__(self) -> None: + path_must_exist(self.ws_csv) + path_must_exist(self.conf) diff --git a/scripts/utils/args/validators.py b/scripts/utils/args/validators.py new file mode 100644 index 0000000..fd48c5b --- /dev/null +++ b/scripts/utils/args/validators.py @@ -0,0 +1,33 @@ +# (C) 2025 GoodData Corporation + +import re +from pathlib import Path + +UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" + + +def path_must_exist(value: Path) -> None: + """Validate that a provided filesystem path exists.""" + if not value.exists(): + raise ValueError(f"Invalid path to input csv given: {value}") + + +def delimiters_must_be_different(delimiter: str, inner_delimiter: str) -> None: + """Validate that a provided delimiter is different from the inner delimiter.""" + if delimiter == inner_delimiter: + raise ValueError("Delimiter and Inner Delimiter cannot be the same.") + + +def inner_delimiter_must_be_valid(inner_delimiter: str) -> None: + """Validate that a provided inner delimiter is valid.""" + if inner_delimiter == "." or re.match(UG_REGEX, inner_delimiter): + raise RuntimeError( + 'Inner delimiter cannot be dot (".") ' + f'or match the following regex: "{UG_REGEX}".' + ) + + +def quotechar_must_be_valid(quotechar: str) -> None: + """Validate that a provided quotechar is valid.""" + if len(quotechar) != 1: + raise RuntimeError("The quotechar argument must be exactly one character long.") diff --git a/scripts/utils/utils.py b/scripts/utils/utils.py index 9614dee..53f2d1d 100644 --- a/scripts/utils/utils.py +++ b/scripts/utils/utils.py @@ -5,7 +5,7 @@ import logging import os from pathlib import Path -from typing import Protocol, Type +from typing import Any, Protocol, Type, TypeVar, cast logger = logging.getLogger(__name__) @@ -24,9 +24,12 @@ def create_from_profile( pass +T = TypeVar("T", bound="PipelinesClient") + + def read_csv_file_to_dict( - file_path: str, delimiter: str = ",", quotechar: str = '"' -) -> list[dict[str, str]]: + file_path: Path, delimiter: str = ",", quotechar: str = '"' +) -> list[dict[str, Any]]: """Read a CSV file and return its content as a list of dictionaries. Args: @@ -39,21 +42,22 @@ def read_csv_file_to_dict( return list(csv.DictReader(file, delimiter=delimiter, quotechar=quotechar)) -def create_client( - client_type: Type[PipelinesClient], profile_config: Path, profile: str -) -> PipelinesClient: +def create_client(client_type: Type[T], profile_config: Path, profile: str) -> T: """Creates GoodData Pipelines client of given type.""" gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") gdc_hostname = os.environ.get("GDC_HOSTNAME") if gdc_hostname and gdc_auth_token: logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") - return client_type.create(host=gdc_hostname, token=gdc_auth_token) + return cast(T, client_type.create(host=gdc_hostname, token=gdc_auth_token)) if os.path.exists(profile_config): logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") - return client_type.create_from_profile( - profile=profile, profiles_path=profile_config + return cast( + T, + client_type.create_from_profile( + profile=profile, profiles_path=profile_config + ), ) raise RuntimeError( diff --git a/scripts/workspace_mgmt.py b/scripts/workspace_mgmt.py index b38ff44..33f9029 100644 --- a/scripts/workspace_mgmt.py +++ b/scripts/workspace_mgmt.py @@ -1,13 +1,11 @@ # (C) 2025 GoodData Corporation -import argparse -import os -from pathlib import Path + from typing import Any from gooddata_pipelines import WorkspaceIncrementalLoad, WorkspaceProvisioner -from gooddata_sdk.utils import PROFILES_FILE_PATH -from utils.logger import get_logger, setup_logging # type: ignore[import] -from utils.utils import ( # type: ignore[import] +from utils.args.parser import Parser +from utils.logger import get_logger, setup_logging +from utils.utils import ( create_client, read_csv_file_to_dict, ) @@ -17,69 +15,6 @@ logger = get_logger(__name__) -def create_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Management of workspaces.") - parser.add_argument( - "filepath", - type=Path, - help="Path to CSV file with input data.", - ) - parser.add_argument( - "-d", - "--delimiter", - type=str, - default=",", - help="Delimiter used to separate different columns in the workspace_csv.", - ) - parser.add_argument( - "-i", - "--inner-delimiter", - type=str, - default="|", - help=( - "Delimiter used to separate different inner values within " - "the columns in the input csv which contain inner-delimiter separated values. " - 'This must differ from the "delimiter" argument.' - ), - ) - parser.add_argument( - "-q", - "--quotechar", - type=str, - default='"', - help=( - "Character used for quoting (escaping) values " - "which contain delimiters or quotechars." - ), - ) - parser.add_argument( - "-p", - "--profile-config", - type=Path, - default=PROFILES_FILE_PATH, - help="Optional path to GoodData profile config. " - f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', - ) - parser.add_argument( - "--profile", - type=str, - default="default", - help='GoodData profile to use. If no profile is provided, "default" is used.', - ) - return parser - - -def validate_args(args: argparse.Namespace) -> None: - """Validates the input arguments.""" - if not os.path.exists(args.filepath): - raise RuntimeError("Invalid path to input csv given.") - - if args.delimiter == args.inner_delimiter: - raise RuntimeError( - "Delimiter and Workspace Data Filter Delimiter cannot be the same." - ) - - def validate_workspace_data( raw_workspaces: list[dict[str, Any]], wdf_delimiter: str, @@ -119,10 +54,7 @@ def workspace_mgmt(): """Main function for workspace management.""" # Create parser and parse arguments - parser = create_parser() - args = parser.parse_args() - - validate_args(args) + args = Parser.parse_workspace_args() # Read CSV input raw_workspaces = read_csv_file_to_dict( From 93a12a03c1ba43b021152de092de00f7e10b85f4 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Mon, 13 Oct 2025 14:55:55 +0200 Subject: [PATCH 3/5] refactor(SVS-1232): reuse backup and restore from gooddata-pipelines --- requirements.txt | 8 +- scripts/backup.py | 496 +---------------- scripts/restore.py | 591 +-------------------- scripts/utils/backup_restore_config.py | 58 ++ scripts/utils/backup_utils/__init__.py | 0 scripts/utils/backup_utils/input_loader.py | 199 ------- scripts/utils/constants.py | 54 -- scripts/utils/gd_api.py | 91 ---- scripts/utils/models/__init__.py | 0 scripts/utils/models/batch.py | 15 - scripts/utils/models/workspace_response.py | 49 -- scripts/utils/utils.py | 115 +++- tests/__init__.py | 1 - tests/test_backup_restore_config.py | 31 ++ 14 files changed, 225 insertions(+), 1483 deletions(-) create mode 100644 scripts/utils/backup_restore_config.py delete mode 100644 scripts/utils/backup_utils/__init__.py delete mode 100644 scripts/utils/backup_utils/input_loader.py delete mode 100644 scripts/utils/constants.py delete mode 100644 scripts/utils/gd_api.py delete mode 100644 scripts/utils/models/__init__.py delete mode 100644 scripts/utils/models/batch.py delete mode 100644 scripts/utils/models/workspace_response.py delete mode 100644 tests/__init__.py create mode 100644 tests/test_backup_restore_config.py diff --git a/requirements.txt b/requirements.txt index 80ca7ef..9a4e938 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,5 @@ # GoodData Python SDK packages gooddata_sdk>=1.52 -gooddata-pipelines>=1.52 +gooddata-pipelines>=1.52.1.dev3 -# Other dependencies -# TODO: remove after full transition to GoodData SDK packages -requests -boto3 -pydantic \ No newline at end of file +pyyaml \ No newline at end of file diff --git a/scripts/backup.py b/scripts/backup.py index 90f6ee6..bb44735 100644 --- a/scripts/backup.py +++ b/scripts/backup.py @@ -1,492 +1,40 @@ # (C) 2025 GoodData Corporation -import abc -import json + import logging -import os -import shutil -import tempfile -import threading -import time -from concurrent.futures import ThreadPoolExecutor, as_completed -from pathlib import Path -from typing import Any, Type -import boto3 # type: ignore[import] -import requests -import yaml -from gooddata_sdk.sdk import GoodDataSdk +from gooddata_pipelines import BackupManager from utils.args.parser import Parser from utils.args.schemas import BackupArgs -from utils.backup_utils.input_loader import InputLoader -from utils.constants import ( - BackupSettings, - DirNames, -) -from utils.gd_api import ( - GDApi, - GoodDataRestApiError, -) +from utils.backup_restore_config import load_config_from_yaml from utils.logger import setup_logging -from utils.models.batch import BackupBatch, Size +from utils.utils import create_client setup_logging() -module_name = __file__.split(os.sep)[-1] -logger = logging.getLogger(module_name) - - -# TODO: consider moving storage related logic to a separate module and reuse it in restore -class BackupRestoreConfig: - def __init__(self, conf_path: str): - with open(conf_path, "r") as stream: - conf: dict = yaml.safe_load(stream) - - self.storage_type: str = conf["storage_type"] - self.storage: dict[str, str] = conf["storage"] - - page_size = conf.get("api_page_size", BackupSettings.DEFAULT_PAGE_SIZE) - self.api_page_size: Size = Size(size=page_size) - - batch_size = conf.get("batch_size", BackupSettings.DEFAULT_BATCH_SIZE) - self.batch_size: Size = Size(size=batch_size) - - -class BackupStorage(abc.ABC): - def __init__(self, conf: BackupRestoreConfig): - return - - @abc.abstractmethod - def export(self, folder, org_id): - """Exports the content of the folder to the storage.""" - raise NotImplementedError - - -class S3Storage(BackupStorage): - def __init__(self, conf: BackupRestoreConfig): - self._config = conf.storage - self._session = self._create_boto_session(self._config) - self._resource = self._session.resource("s3") - self._bucket = self._resource.Bucket(self._config["bucket"]) # type: ignore [missing library stubs] - suffix = "/" if not self._config["backup_path"].endswith("/") else "" - self._backup_path = self._config["backup_path"] + suffix - - self._verify_connection() - - @staticmethod - def _create_boto_session(config: dict[str, str]) -> boto3.Session: - if config.get("aws_access_key_id") and config.get("aws_secret_access_key"): - if not config.get("aws_default_region"): - logger.warning("No AWS region specified. Defaulting to us-east-1.") - try: - return boto3.Session( - aws_access_key_id=config["aws_access_key_id"], - aws_secret_access_key=config["aws_secret_access_key"], - region_name=config["aws_default_region"], - ) - except Exception: - logger.warning( - "Failed to create boto3 session with supplied credentials. Falling back to profile..." - ) - try: - return boto3.Session(profile_name=config.get("profile")) - except Exception: - logger.warning( - f'AWS profile "{config.get("profile")}" not found. Trying other fallback methods...' - ) - - return boto3.Session() - - def _verify_connection(self) -> None: - """ - Pings the S3 bucket to verify that the connection is working. - """ - try: - self._resource.meta.client.head_bucket(Bucket=self._config["bucket"]) - except Exception as e: - raise RuntimeError( - f"Failed to connect to S3 bucket {self._config['bucket']}: {e}" - ) - - def export(self, folder, org_id) -> None: - """Uploads the content of the folder to S3 as backup.""" - storage_path = self._config["bucket"] + "/" + self._backup_path - logger.info(f"Uploading {org_id} to {storage_path}") - folder = folder + "/" + org_id - for subdir, dirs, files in os.walk(folder): - full_path = os.path.join(subdir) - export_path = ( - self._backup_path + org_id + "/" + full_path[len(folder) + 1 :] + "/" - ) - self._bucket.put_object(Key=export_path) - - for file in files: - full_path = os.path.join(subdir, file) - with open(full_path, "rb") as data: - export_path = ( - self._backup_path + org_id + "/" + full_path[len(folder) + 1 :] - ) - self._bucket.put_object(Key=export_path, Body=data) - - -class LocalStorage(BackupStorage): - def __init__(self, conf: BackupRestoreConfig): - return - - def export(self, folder, org_id, export_folder="local_backups"): - """Copies the content of the folder to local storage as backup.""" - logger.info(f"Saving {org_id} to local storage") - shutil.copytree( - Path(folder), Path(Path.cwd(), export_folder), dirs_exist_ok=True - ) - - -def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: - """Creates a GoodData API client from the specified profile.""" - with open(profile_config, "r") as file: - config = yaml.safe_load(file) - - if profile not in config: - raise RuntimeError( - f'Specified profile name "{profile}" not found in "{profile_config}".' - ) - - profile_conf = config[profile] - hostname, token = profile_conf["host"], profile_conf["token"] - return GDApi(hostname, token) - - -def write_to_yaml(folder, source): - """Writes the source to a YAML file.""" - with open(folder, "w") as outfile: - yaml.dump(source, outfile) - - -def get_storage(storage_type: str) -> Type[BackupStorage]: - """Returns the storage class based on the storage type.""" - match storage_type: - case "s3": - logger.info("Storage type set to S3.") - return S3Storage - case "local": - logger.info("Storage type set to local storage.") - return LocalStorage - case _: - raise RuntimeError(f'Unsupported storage type "{storage_type}".') - - -def get_user_data_filters(api: GDApi, ws_id: str) -> dict | None: - """Returns the user data filters for the specified workspace.""" - try: - user_data_filters = api.get(f"/layout/workspaces/{ws_id}/userDataFilters", None) - if user_data_filters: - return user_data_filters.json() - except GoodDataRestApiError as e: - logger.error(f"UDF call for {ws_id} returned error: {e}") - return None - - -def store_user_data_filters( - user_data_filters: dict, export_path: Path, org_id: str, ws_id: str -): - """Stores the user data filters in the specified export path.""" - os.mkdir( - os.path.join( - export_path, - "gooddata_layouts", - org_id, - "workspaces", - ws_id, - "user_data_filters", - ) - ) - - for filter in user_data_filters["userDataFilters"]: - udf_file_path = os.path.join( - export_path, - "gooddata_layouts", - org_id, - "workspaces", - ws_id, - "user_data_filters", - filter["id"] + ".yaml", - ) - write_to_yaml(udf_file_path, filter) - - -def move_folder(source: Path, destination: Path) -> None: - """Moves the source folder to the destination.""" - shutil.move(source, destination) - - -def get_automations_from_api(api: GDApi, ws_id: str) -> Any: - """Returns automations for the workspace as JSON.""" - response: requests.Response = requests.get( - f"{api.endpoint}/entities/workspaces/{ws_id}/automations?include=ALL", - headers={ - "Authorization": f"Bearer {api.api_token}", - "Content-Type": "application/vnd.gooddata.api+json", - }, - ) - content: Any = response.json() - - return content - - -def store_automations(api: GDApi, export_path: Path, org_id: str, ws_id: str) -> None: - """Stores the automations in the specified export path.""" - # Get the automations from the API - automations: Any = get_automations_from_api(api, ws_id) - - automations_folder_path: Path = Path( - export_path, "gooddata_layouts", org_id, "workspaces", ws_id, "automations" - ) - - automations_file_path: Path = Path(automations_folder_path, "automations.json") - - os.mkdir(automations_folder_path) - - # Store the automations in a JSON file - if len(automations["data"]) > 0: - with open(automations_file_path, "w") as f: - json.dump(automations, f) - +logger = logging.getLogger(__name__) -def store_declarative_filter_views( - sdk: GoodDataSdk, export_path: Path, org_id: str, ws_id: str -) -> None: - """Stores the filter views in the specified export path.""" - # Get the filter views YAML files from the API - sdk.catalog_workspace.store_declarative_filter_views(ws_id, export_path) - # Move filter views to the subfolder containing analytics model - move_folder( - Path(export_path, "gooddata_layouts", org_id, "filter_views"), - Path( - export_path, - "gooddata_layouts", - org_id, - "workspaces", - ws_id, - "filter_views", - ), - ) - - -def get_workspace_export( - sdk: GoodDataSdk, - api: GDApi, - local_target_path: str, - org_id: str, - workspaces_to_export: list[str], -) -> None: - """ - Iterate over all workspaces in the workspaces_to_export list and store - their declarative_workspace and their respective user data filters. - """ - exported = False - for ws_id in workspaces_to_export: - export_path = Path( - local_target_path, org_id, ws_id, BackupSettings.TIMESTAMP_SDK_FOLDER - ) - - user_data_filters = get_user_data_filters(api, ws_id) - if not user_data_filters: - logger.error(f"Skipping backup of {ws_id} - check if workspace exists.") - continue - - try: - sdk.catalog_workspace.store_declarative_workspace(ws_id, export_path) - store_declarative_filter_views(sdk, export_path, org_id, ws_id) - store_automations(api, export_path, org_id, ws_id) - - store_user_data_filters(user_data_filters, export_path, org_id, ws_id) - logger.info(f"Stored export for {ws_id}") - exported = True - except Exception as e: - logger.error(f"Skipping {ws_id}. {e.__class__.__name__} encountered: {e}") - - if not exported: - raise RuntimeError( - "None of the workspaces were exported. Check source file and their existence." - ) - - -def archive_gooddata_layouts_to_zip(folder: str) -> None: - """Archives the gooddata_layouts directory to a zip file.""" - target_subdir = "" - for subdir, dirs, files in os.walk(folder): - if DirNames.LAYOUTS in dirs: - target_subdir = os.path.join(subdir, dirs[0]) - if DirNames.LDM in dirs: - inner_layouts_dir = subdir + "/gooddata_layouts" - os.mkdir(inner_layouts_dir) - for dir in dirs: - shutil.move(os.path.join(subdir, dir), os.path.join(inner_layouts_dir)) - shutil.make_archive(target_subdir, "zip", subdir) - shutil.rmtree(target_subdir) - - -def create_client(args: BackupArgs) -> tuple[GoodDataSdk, GDApi]: - """Creates a GoodData client.""" - gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") - gdc_hostname = os.environ.get("GDC_HOSTNAME") - - if gdc_hostname and gdc_auth_token: - logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") - sdk = GoodDataSdk.create(gdc_hostname, gdc_auth_token) - api = GDApi(gdc_hostname, gdc_auth_token) - return sdk, api - - profile_config, profile = args.profile_config, args.profile - if os.path.exists(profile_config): - logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") - sdk = GoodDataSdk.create_from_profile(profile, profile_config) - api = create_api_client_from_profile(profile, profile_config) - return sdk, api - - raise RuntimeError( - "No GoodData credentials provided. Please export required ENVVARS " - "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to profile config." - ) - - -def split_to_batches( - workspaces_to_export: list[str], batch_size: Size -) -> list[BackupBatch]: - """Splits the list of workspaces to into batches of the specified size. - The batch is respresented as a list of workspace IDs. - Returns a list of batches (i.e. list of lists of IDs) - """ - list_of_batches = [] - while workspaces_to_export: - batch = BackupBatch(workspaces_to_export[: batch_size.size]) - workspaces_to_export = workspaces_to_export[batch_size.size :] - list_of_batches.append(batch) - - return list_of_batches - - -def process_batch( - sdk: GoodDataSdk, - api: GDApi, - org_id: str, - storage: BackupStorage, - batch: BackupBatch, - stop_event: threading.Event, - retry_count: int = 0, -) -> None: - """Processes a single batch of workspaces for backup. - If the batch processing fails, the function will wait - and retry with exponential backoff up to BackupSettings.MAX_RETRIES. - The base wait time is defined by BackupSettings.RETRY_DELAY. - """ - if stop_event.is_set(): - # If the stop_event flag is set, return. This will terminate the thread. - return - - try: - with tempfile.TemporaryDirectory() as tmpdir: - get_workspace_export(sdk, api, tmpdir, org_id, batch.list_of_ids) - - archive_gooddata_layouts_to_zip(str(Path(tmpdir, org_id))) - - storage.export(tmpdir, org_id) - - except Exception as e: - if stop_event.is_set(): - return - - elif retry_count < BackupSettings.MAX_RETRIES: - # Retry with exponential backoff until MAX_RETRIES. - next_retry = retry_count + 1 - wait_time = BackupSettings.RETRY_DELAY**next_retry - logger.info( - f"{e.__class__.__name__} encountered while processing a batch. " - + f"Retrying {next_retry}/{BackupSettings.MAX_RETRIES} in {wait_time} seconds..." - ) - - time.sleep(wait_time) - process_batch(sdk, api, org_id, storage, batch, stop_event, next_retry) - else: - # If the batch fails after MAX_RETRIES, raise the error. - logger.error(f"Batch failed: {e.__class__.__name__}: {e}") - raise - - -def process_batches_in_parallel( - sdk: GoodDataSdk, - api: GDApi, - org_id: str, - storage: BackupStorage, - batches: list[BackupBatch], -) -> None: - """ - Processes batches in parallel using concurrent.futures. Will stop the processing - if any one of the batches fails. - """ - - # Create a threading flag to control the threads that have already been started - stop_event = threading.Event() - - with ThreadPoolExecutor(max_workers=BackupSettings.MAX_WORKERS) as executor: - # Set the futures tasks. - futures = [] - for batch in batches: - futures.append( - executor.submit( - process_batch, sdk, api, org_id, storage, batch, stop_event - ) - ) - - # Process futures as they complete - for future in as_completed(futures): - try: - future.result() - except Exception: - # On failure, set the flag to True - signal running processes to stop. - stop_event.set() - - # Cancel unstarted threads. - for f in futures: - if not f.done(): - f.cancel() - - raise - - -def main(args: BackupArgs) -> None: - """Main function for the backup script.""" - sdk, api = create_client(args) - - org_id: str = sdk.catalog_organization.organization_id - - conf: BackupRestoreConfig = BackupRestoreConfig(str(args.conf)) - - storage_class: Type[BackupStorage] = get_storage(conf.storage_type) - storage: BackupStorage = storage_class(conf) - - loader = InputLoader(api, conf.api_page_size) - workspaces_to_export: list[str] = loader.get_ids_to_backup( - args.input_type, str(args.ws_csv) - ) +def backup(): + args: BackupArgs = Parser.parse_backup_args() - batches = split_to_batches(workspaces_to_export, conf.batch_size) + backup_restore_config = load_config_from_yaml(args.conf) - logger.info( - f"Exporting {len(workspaces_to_export)} workspaces in {len(batches)} batches." + backup_manager = create_client( + BackupManager, + args.profile_config, + args.profile, + config=backup_restore_config, ) - process_batches_in_parallel(sdk, api, org_id, storage, batches) - - -def backup(): - args: BackupArgs = Parser.parse_backup_args() - - try: - main(args) + backup_manager.logger.subscribe(logger) - logger.info("Backup completed!") - except Exception as e: - logger.error(f"Backup failed: {e}") + if args.input_type == "entire-organization": + backup_manager.backup_entire_organization() + elif args.input_type == "list-of-workspaces": + backup_manager.backup_workspaces(str(args.ws_csv)) + elif args.input_type == "list-of-parents": + backup_manager.backup_hierarchies(str(args.ws_csv)) + else: + raise ValueError(f"Unsupported input type: {args.input_type}") if __name__ == "__main__": diff --git a/scripts/restore.py b/scripts/restore.py index 8448662..a3af39c 100644 --- a/scripts/restore.py +++ b/scripts/restore.py @@ -1,567 +1,16 @@ # (C) 2025 GoodData Corporation -import abc -import csv -import json + import logging -import os -import tempfile -import traceback -import zipfile -from pathlib import Path -from typing import Any, Optional, Type, TypeAlias -import boto3 -import requests -import yaml -from gooddata_sdk.catalog.workspace.declarative_model.workspace.analytics_model.analytics_model import ( - CatalogDeclarativeAnalytics, -) -from gooddata_sdk.catalog.workspace.declarative_model.workspace.automation import ( - CatalogDeclarativeAutomation, -) -from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import ( - CatalogDeclarativeModel, -) -from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import ( - CatalogDeclarativeFilterView, -) -from gooddata_sdk.sdk import GoodDataSdk +from gooddata_pipelines import RestoreManager, WorkspaceToRestore from utils.args.parser import Parser from utils.args.schemas import RestoreArgs -from utils.constants import DirNames +from utils.backup_restore_config import load_config_from_yaml from utils.logger import setup_logging - -BEARER_TKN_PREFIX = "Bearer" +from utils.utils import create_client, read_csv_file_to_dict setup_logging() -module_name = __file__.split(os.sep)[-1] -logger = logging.getLogger(module_name) - -GDWorkspace: TypeAlias = tuple[CatalogDeclarativeModel, CatalogDeclarativeAnalytics] - - -class GoodDataRestApiError(Exception): - """Wrapper for errors occurring from interaction with GD REST API.""" - - -class BackupRestoreError(Exception): - def __init__(self, cause: str = "Unknown"): - self.cause = cause - - -class BackupRestoreConfig: - def __init__(self, conf_path: str): - conf = self._load_conf(conf_path) - self.storage_type = conf["storage_type"] - self.storage = conf["storage"] - - @staticmethod - def _load_conf(path: str) -> dict[str, Any]: - with open(path, "r") as conf: - return yaml.safe_load(conf) - - -# TODO: storage logic also defined in backup.py, consider moving to utils -class BackupStorage(abc.ABC): - """ - Retrieves archive of backed up hierarchical export of workspace declaration. - - Implement this abstract base class for different kinds of storage providers. - """ - - def __init__(self, conf: BackupRestoreConfig): - return - - @abc.abstractmethod - def get_ws_declaration(self, target_path: str, local_target_path: Path) -> None: - raise NotImplementedError - - -class S3StorageConfig: - def __init__(self, storconf: dict[str, Any]): - self.bucket: str = storconf["bucket"] - suffix = "/" if not storconf["backup_path"].endswith("/") else "" - self.backup_path: str = storconf["backup_path"] + suffix - self.profile = storconf.get("profile", "default") - self.aws_access_key_id = storconf.get("aws_access_key_id") - self.aws_secret_access_key = storconf.get("aws_secret_access_key") - self.aws_default_region = storconf.get("aws_default_region") - - -class S3Storage(BackupStorage): - """ - Retrieves archive of backed up hierarchical export of workspace declaration from S3. - """ - - def __init__(self, conf: BackupRestoreConfig): - self._config = S3StorageConfig(conf.storage) - self._session = self._create_boto_session(self._config) - self._api = self._session.resource("s3") - self._bucket = self._api.Bucket(self._config.bucket) - self._validate_backup_path() - - @staticmethod - def _create_boto_session(config: S3StorageConfig) -> boto3.Session: - if config.aws_access_key_id and config.aws_secret_access_key: - if not config.aws_default_region: - logger.warning("No AWS region specified. Defaulting to us-east-1.") - try: - return boto3.Session( - aws_access_key_id=config.aws_access_key_id, - aws_secret_access_key=config.aws_secret_access_key, - region_name=config.aws_default_region, - ) - except Exception: - logger.warning( - "Failed to create boto3 session with supplied credentials. Falling back to profile..." - ) - try: - return boto3.Session(profile_name=config.profile) - except Exception: - logger.warning( - f'AWS profile "{config.profile}" not found. Trying other fallback methods...' - ) - - return boto3.Session() - - def _validate_backup_path(self) -> None: - """Validates if backup path exists in the S3 bucket.""" - objects_filter = self._bucket.objects.filter(Prefix=self._config.backup_path) - - try: - objects = list(objects_filter) - except Exception as e: - raise RuntimeError(f"Error raised while validating s3 config. Error: {e}") - - if len(objects) == 0: - raise RuntimeError("Provided s3 backup_path does not exist. Exiting...") - - def get_ws_declaration(self, target_path: str, local_target_path: Path) -> None: - """Retrieves workspace declaration from S3 bucket.""" - s3_backup_path = self._config.backup_path - target_s3_prefix = f"{s3_backup_path}{target_path}" - - objs_found = list(self._bucket.objects.filter(Prefix=target_s3_prefix)) - - # Remove the included directory (which equals prefix) on hit - objs_found = objs_found[1:] if len(objs_found) > 0 else objs_found - - if not objs_found: - logger.error(f"No target backup found for {target_s3_prefix}.") - raise BackupRestoreError(f"No target found for {target_s3_prefix}") - - if len(objs_found) > 1: - logger.warning( - f"Multiple backups found at {target_s3_prefix}." - " Continuing with the first one, ignoring the rest..." - ) - - s3_obj = objs_found[0] - self._bucket.download_file(s3_obj.key, str(local_target_path)) - - -MaybeResponse: TypeAlias = Optional[requests.Response] - - -class GDApi: - # TODO: also defined in utils, consider importing from there - def __init__(self, host: str, api_token: str, headers: dict[str, Any] = {}): - self.endpoint = self._handle_endpoint(host) - self.api_token = api_token - self.headers = headers - self.wait_api_time = 10 - - @staticmethod - def _handle_endpoint(host: str) -> str: - """Ensures that the endpoint URL is properly formatted.""" - return f"{host}api/v1" if host[-1] == "/" else f"{host}/api/v1" - - def put( - self, path: str, request: dict[str, Any], ok_code: int = 200 - ) -> requests.Response: - """Sends a PUT request to the GoodData API.""" - kwargs = self._prepare_request(path) - kwargs["headers"]["Content-Type"] = "application/json" - kwargs["json"] = request - logger.debug(f"PUT request: {json.dumps(request)}") - response = requests.put(**kwargs) - resolved_response = self._resolve_return_code( - response, ok_code, kwargs["url"], "RestApi.put" - ) - assert resolved_response is not None - return resolved_response - - def _prepare_request(self, path: str, params=None) -> dict[str, Any]: - """Prepares the request to be sent to the GoodData API.""" - kwargs: dict[str, Any] = { - "url": f"{self.endpoint}/{path}", - "headers": self.headers.copy(), - } - if params: - kwargs["params"] = params - if self.api_token: - kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}" - else: - raise RuntimeError( - "Token required for authentication against GD API is missing." - ) - - return kwargs - - @staticmethod - def _resolve_return_code( - response, ok_code: int, url, method, not_found_code: Optional[int] = None - ) -> MaybeResponse: - """Resolves the return code of the response.""" - if response.status_code == ok_code: - logger.debug(f"{method} to {url} succeeded") - return response - if not_found_code and response.status_code == not_found_code: - logger.debug(f"{method} to {url} failed - target not found") - return None - raise GoodDataRestApiError( - f"{method} to {url} failed - " - f"response_code={response.status_code} message={response.text}" - ) - - -def read_targets_from_csv(csv_path: str) -> dict[str, str]: - """Reads the csv file with workspace IDs and paths to backups.""" - # TODO - handling of csv files with and without headers - # TODO - handling csv files with unsupported structure/schema - ws_paths: dict[str, str] = {} - with open(csv_path, "r") as f: - reader = csv.reader(f, skipinitialspace=True) - next(reader) # Skip header - for row in reader: - ws_id, ws_path = row - - if ws_paths.get(ws_id) is not None: - logger.warning( - f'Duplicate backup targets for ws_id "{ws_id}" found. ' - f'Overwriting the target at "{ws_paths[ws_id]}" with "{ws_path}".' - ) - - ws_paths[ws_id] = ws_path - - return ws_paths - - -def validate_targets(sdk: GoodDataSdk, ws_paths: dict[str, str]) -> None: - """Validates the targets provided. - Since for now we don't support restore of deleted backups, - we can let the user know in advance about unknown IDs. - """ - ws_list = sdk.catalog_workspace.list_workspaces() - available_ids = {ws.id for ws in ws_list} - target_ids = set(ws_paths.keys()) - - unknown_ids = target_ids - available_ids - if unknown_ids: - logger.error( - "Unknown IDs specified in the input csv file. " - f"These will be ignored. The unknown IDs are: {unknown_ids}." - ) - - for ws_id in unknown_ids: - ws_paths.pop(ws_id) - - -def get_storage(storage_type: str) -> Type[BackupStorage]: - """Factory method for creating storage providers.""" - match storage_type: - case "s3": - return S3Storage - case _: - raise RuntimeError(f'Unsupported storage type "{storage_type}".') - - -class RestoreWorker: - def __init__( - self, - sdk: GoodDataSdk, - api: GDApi, - storage: BackupStorage, - ws_paths: dict[str, str], - ): - self._sdk = sdk - self._api = api - self._storage = storage - self._ws_paths = ws_paths - self.org_id = sdk.catalog_organization.organization_id - - def _get_ws_declaration(self, ws_path: str, target: Path) -> None: - """Fetches the backup of workspace declaration from storage provider.""" - try: - self._storage.get_ws_declaration(ws_path, target) - except Exception as e: - logger.error("Failed to fetch restore backup for workspace.") - raise BackupRestoreError(type(e).__name__) - - @staticmethod - def _extract_zip_archive(target: Path, tempdir_path: Path) -> None: - """Extracts the backup from zip archive.""" - try: - with zipfile.ZipFile(target, "r") as zip_ref: - zip_ref.extractall(tempdir_path) - except Exception as e: - logger.error("Failed to extract backup from zip archive.") - raise BackupRestoreError(type(e).__name__) - - def _load_workspace_layout(self, src_path: Path) -> GDWorkspace: - """Loads the workspace layout from the backup.""" - try: - sdk_catalog = self._sdk.catalog_workspace_content - - ldm = sdk_catalog.load_ldm_from_disk(src_path) - am = sdk_catalog.load_analytics_model_from_disk(src_path) - - return ldm, am - except Exception as e: - logger.error("Failed to load workspace declaration.") - raise BackupRestoreError(type(e).__name__) - - @staticmethod - def _convert_udf_files_to_api_body(src_path: Path) -> dict: - """Converts UDF files to API body.""" - user_data_filters: dict = {"userDataFilters": []} - user_data_filters_folder = os.path.join(src_path, DirNames.UDF) - for filename in os.listdir(user_data_filters_folder): - f = os.path.join(user_data_filters_folder, filename) - with open(f, "r") as file: - user_data_filter = yaml.safe_load(file) - user_data_filters["userDataFilters"].append(user_data_filter) - - return user_data_filters - - def _load_user_data_filters(self, src_path: Path) -> dict: - try: - return self._convert_udf_files_to_api_body(src_path) - except Exception as e: - logger.error("Failed to retrieve contents of user_data_filters folder.") - raise BackupRestoreError(type(e).__name__) - - def _load_and_put_filter_views(self, ws_id: str, src_path: Path) -> None: - """Loads and puts filter views into GoodData workspace.""" - filter_views: list[CatalogDeclarativeFilterView] = [] - if not (src_path / "filter_views").exists(): - # Skip if the filter_views directory does not exist - return - - for file in Path(src_path / "filter_views").iterdir(): - filter_view_content: dict[str, Any] = dict(self._safe_load_yaml(file)) - filter_view: CatalogDeclarativeFilterView = ( - CatalogDeclarativeFilterView.from_dict(filter_view_content) - ) - filter_views.append(filter_view) - - if filter_views: - self._sdk.catalog_workspace.put_declarative_filter_views( - ws_id, filter_views - ) - - def _load_and_post_automations(self, ws_id: str, source_path: Path) -> None: - """Loads automations from specified json file and creates them in the workspace.""" - # Load automations from JSON - path_to_json: Path = Path(source_path, "automations", "automations.json") - - if not (source_path.exists() and path_to_json.exists()): - # Both the folder and the file must exist, otherwise skip - return - - # Delete all automations from the workspace and restore the automations from the backup. - self._delete_all_automations(ws_id) - - data: dict = self._load_json(path_to_json) - automations: list[dict] = data["data"] - - for automation in automations: - self._post_automation(ws_id, automation) - - def _delete_all_automations(self, ws_id: str) -> None: - """Deletes all automations in the workspace.""" - automations: list[CatalogDeclarativeAutomation] = ( - self._sdk.catalog_workspace.get_declarative_automations(ws_id) - ) - for automation in automations: - requests.delete( - f"{self._api.endpoint}/entities/workspaces/{ws_id}/automations/{automation.id}", - headers={ - "Authorization": f"{BEARER_TKN_PREFIX} {self._api.api_token}", - "Content-Type": "application/vnd.gooddata.api+json", - }, - ) - - def _post_automation(self, ws_id: str, automation: dict) -> None: - """Posts a scheduled export to the workspace.""" - attributes: dict = automation["attributes"] - relationships: dict = automation["relationships"] - id: str = automation["id"] - - if attributes.get("schedule"): - if attributes["schedule"].get("cronDescription"): - # the cron description attribute is causing a 500 error ("No mapping found...") - del attributes["schedule"]["cronDescription"] - - response: requests.Response = requests.post( - f"{self._api.endpoint}/entities/workspaces/{ws_id}/automations", - headers={ - "Authorization": f"{BEARER_TKN_PREFIX} {self._api.api_token}", - "Content-Type": "application/vnd.gooddata.api+json", - }, - data=json.dumps( - { - "data": { - "attributes": attributes, - "id": id, - "type": "automation", - "relationships": relationships, - } - }, - ), - ) - - if response.status_code != 201: - logger.error( - f"Failed to post automation ({response.status_code}): {response.text}" - ) - - def _safe_load_yaml(self, path: Path) -> Any: - """Safely loads a yaml file at the given path.""" - with open(path, "r") as f: - return yaml.safe_load(f) - - def _load_json(self, path: Path) -> Any: - """Loads a json file at the given path.""" - with open(path, "r") as f: - return json.load(f) - - @staticmethod - def _check_workspace_is_valid(src_path: Path) -> None: - """Checks if the workspace layout is valid.""" - # NOTE - this is a weaker, temporary validation. - # Should be replaced upon SDK version bump. - if not src_path.exists() or not src_path.is_dir(): - logger.error( - "Invalid source path found upon backup fetch. " - f"Got {src_path}. " - "Check if target zip contains gooddata_layouts directory." - ) - raise BackupRestoreError("Invalid source path upon load.") - - children = list(src_path.iterdir()) - am_path = src_path / DirNames.AM - ldm_path = src_path / DirNames.LDM - udf_path = src_path / DirNames.UDF - - if ( - am_path not in children - or ldm_path not in children - or udf_path not in children - ): - logger.error( - "LDM or AM directory missing in the workspace hierarchy. " - "Check if gooddata_layouts contains " - f"{DirNames.AM}, {DirNames.LDM} and {DirNames.UDF} directories." - ) - raise BackupRestoreError("LDM or AM directory missing.") - - def _put_workspace_layout(self, ws_id: str, workspace: GDWorkspace) -> None: - """Puts the workspace layout into GoodData.""" - ldm, am = workspace - try: - sdk_catalog = self._sdk.catalog_workspace_content - - sdk_catalog.put_declarative_ldm(ws_id, ldm) - sdk_catalog.put_declarative_analytics_model(ws_id, am) - - except Exception as e: - logger.error("Failed to put workspace into GoodData.") - raise BackupRestoreError(type(e).__name__) - - def _put_user_data_filters(self, ws_id: str, user_data_filters: dict): - """Puts the user data filters into GoodData workspace.""" - try: - self._api.put( - f"layout/workspaces/{ws_id}/userDataFilters", user_data_filters, 204 - ) - except GoodDataRestApiError as e: - logger.error(f"Failed to put user data filters into {ws_id}") - raise BackupRestoreError(type(e).__name__) - - def _restore_backup(self, ws_id: str, tempdir: str) -> None: - """Restores the backup of a workspace.""" - ws_path = self._ws_paths[ws_id] - tempdir_path = Path(tempdir) - zip_target = tempdir_path / f"{DirNames.LAYOUTS}.zip" - src_path = tempdir_path / DirNames.LAYOUTS - - try: - self._get_ws_declaration(ws_path, zip_target) - self._extract_zip_archive(zip_target, tempdir_path) - self._check_workspace_is_valid(src_path) - workspace = self._load_workspace_layout(src_path) - user_data_filters = self._load_user_data_filters(src_path) - self._put_workspace_layout(ws_id, workspace) - self._put_user_data_filters(ws_id, user_data_filters) - self._load_and_put_filter_views(ws_id, src_path) - self._load_and_post_automations(ws_id, src_path) - logger.info(f"Finished backup restore of {ws_id} from {ws_path}.") - except BackupRestoreError as e: - logger.error( - f"Failed to restore backup of {ws_id} from {ws_path}. " - f"Error caused by {e.cause}." - ) - trace = traceback.format_exc() - logger.debug( - f"Attempt to restore backup raised following error: {e.cause}. " - f"Traceback:\n{trace}" - ) - - def incremental_restore(self): - """Restores the backups of workspaces incrementally.""" - for ws_id in self._ws_paths.keys(): - with tempfile.TemporaryDirectory() as tempdir: - self._restore_backup(ws_id, tempdir) - - -def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: - """Creates a GoodData API client from a profile.""" - with open(profile_config, "r") as file: - config = yaml.safe_load(file) - - if profile not in config: - raise RuntimeError( - f'Specified profile name "{profile}" not found in "{profile_config}".' - ) - - profile_conf = config[profile] - hostname, token = profile_conf["host"], profile_conf["token"] - return GDApi(hostname, token) - - -def create_client(args: RestoreArgs) -> tuple[GoodDataSdk, GDApi]: - """Creates GoodData SDK and API clients.""" - gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") - gdc_hostname = os.environ.get("GDC_HOSTNAME") - - if gdc_hostname and gdc_auth_token: - logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") - sdk = GoodDataSdk.create(gdc_hostname, gdc_auth_token) - api = GDApi(gdc_hostname, gdc_auth_token) - return sdk, api - - profile_config, profile = args.profile_config, args.profile - if os.path.exists(profile_config): - logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") - sdk = GoodDataSdk.create_from_profile(profile, profile_config) - api = create_api_client_from_profile(profile, profile_config) - return sdk, api - - raise RuntimeError( - "No GoodData credentials provided. Please export required ENVVARS " - "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." - ) +logger = logging.getLogger(__name__) def restore(): @@ -569,20 +18,32 @@ def restore(): args: RestoreArgs = Parser.parse_restore_args() - sdk, api = create_client(args) + backup_restore_config = load_config_from_yaml(args.conf) - conf = BackupRestoreConfig(str(args.conf)) + restore_manager = create_client( + RestoreManager, + args.profile_config, + args.profile, + config=backup_restore_config, + ) - cls_storage: type[BackupStorage] = get_storage(conf.storage_type) - storage = cls_storage(conf) + # Subscribe to logs + restore_manager.logger.subscribe(logger) - ws_paths = read_targets_from_csv(str(args.ws_csv)) - validate_targets(sdk, ws_paths) + # Load workspaces from CSV + workspaces = read_csv_file_to_dict(args.ws_csv) - restore_worker = RestoreWorker(sdk, api, storage, ws_paths) + # Validate data from CSV input + workspaces_to_restore = [ + WorkspaceToRestore( + id=workspace["workspace_id"], + path=workspace["path"], + ) + for workspace in workspaces + ] - logger.info("Starting incremental backup restore based on target csv file...") - restore_worker.incremental_restore() + # Restore workspaces + restore_manager.restore(workspaces_to_restore) if __name__ == "__main__": diff --git a/scripts/utils/backup_restore_config.py b/scripts/utils/backup_restore_config.py new file mode 100644 index 0000000..db61df4 --- /dev/null +++ b/scripts/utils/backup_restore_config.py @@ -0,0 +1,58 @@ +from pathlib import Path + +import yaml +from gooddata_pipelines import ( + BackupRestoreConfig, + LocalStorageConfig, + S3StorageConfig, + StorageType, +) +from gooddata_pipelines.backup_and_restore.constants import BackupSettings + + +def load_config_from_yaml(path: Path) -> BackupRestoreConfig: + """Loads the backup and restore configuration from a YAML file.""" + storage_config: LocalStorageConfig | S3StorageConfig + + with open(path, "r") as file: + contents = yaml.safe_load(file) + + if "storage_type" not in contents: + raise ValueError("storage_type is required in the configuration file") + + if contents["storage_type"] == "local": + storage_type = StorageType.LOCAL + else: + storage_type = StorageType.S3 + + if storage_type == StorageType.LOCAL: + local_storage = contents.get("storage", {}) + if not local_storage: + backup_path = "local_backups" + else: + backup_path = local_storage.get("backup_path", "local_backups") + + storage_config = LocalStorageConfig(backup_path=backup_path) + else: + storage_config = S3StorageConfig( + backup_path=contents["storage"]["backup_path"], + bucket=contents["storage"]["bucket"], + profile=contents["storage"].get("profile"), + aws_access_key_id=contents["storage"].get("aws_access_key_id"), + aws_secret_access_key=contents["storage"].get("aws_secret_access_key"), + aws_default_region=contents["storage"].get( + "aws_default_region", "us-east-1" + ), + ) + + config = BackupRestoreConfig( + storage_type=storage_type, + storage=storage_config, + api_page_size=contents.get("api_page_size", BackupSettings.API.PAGE_SIZE), + batch_size=contents.get("batch_size", BackupSettings.API.BATCH_SIZE), + api_calls_per_second=contents.get( + "api_calls_per_second", BackupSettings.API.CALLS_PER_SECOND + ), + ) + + return config diff --git a/scripts/utils/backup_utils/__init__.py b/scripts/utils/backup_utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py deleted file mode 100644 index 90a12d9..0000000 --- a/scripts/utils/backup_utils/input_loader.py +++ /dev/null @@ -1,199 +0,0 @@ -# (C) 2025 GoodData Corporation -import csv -import logging -from dataclasses import dataclass -from typing import Iterator - -from utils.gd_api import ( # type: ignore[import] - API_VERSION, - GDApi, - GoodDataRestApiError, - MaybeResponse, -) -from utils.models.batch import Size # type: ignore[import] -from utils.models.workspace_response import ( # type: ignore[import] - Workspace, - WorkspaceResponse, -) - -logger = logging.getLogger(__name__) - - -class InputLoader: - """Class to handle loading and parsing the input data.""" - - api_client: GDApi - base_workspace_endpoint: str - hierarchy_endpoint: str - all_workspaces_endpoint: str - - def __init__(self, api_client: GDApi, page_size: Size) -> None: - self.api_client = api_client - self.page_size = page_size.size - self.set_endpoints() - - def set_endpoints(self) -> None: - """Sets the hierarchy endpoint for the API client.""" - self.base_workspace_endpoint = "/api/v1/entities/workspaces" - self.hierarchy_endpoint = ( - f"{self.base_workspace_endpoint}?" - + "filter=parent.id=={parent_id}" - + f"&include=parent&page=0&size={self.page_size}&sort=name,asc&metaInclude=page,hierarchy" - ) - self.all_workspaces_endpoint = f"{self.base_workspace_endpoint}?page=0&size={self.page_size}&sort=name,asc&metaInclude=page" - - @dataclass - class _ProcessDataOutput: - workspace_ids: list[str] - sub_parents: list[str] | None = None - - @staticmethod - def read_csv_input_for_backup(file_path: str) -> list[str]: - """Reads the input CSV file and returns its content from the first column as a list of string.""" - - with open(file_path) as csv_file: - reader: Iterator[list[str]] = csv.reader(csv_file, skipinitialspace=True) - - try: - # Skip the header - headers = next(reader) - - if len(headers) > 1: - raise ValueError( - "Input file contains more than one column. Please check the input and try again." - ) - - except StopIteration: - # Raise an error if the iterator is empty - raise ValueError("No content found in the CSV file.") - - # Read the content - content = [row[0] for row in reader] - - # If the content is empty (no rows), raise an error - if not content: - raise ValueError("No workspaces found in the CSV file.") - - return content - - def fetch_page(self, url: str) -> WorkspaceResponse: - """Fetch a page of workspaces.""" - - # Separate the API path from the URL so that it can be fed to the GDApi class - endpoint: str = url.split(f"api/{API_VERSION}")[1] - response: MaybeResponse = self.api_client.get(endpoint, None) - if response: - return WorkspaceResponse(**response.json()) - else: - raise GoodDataRestApiError( - f"Failed to fetch data from the API. URL: {endpoint}" - ) - - @staticmethod - def process_data(data: list[Workspace]) -> _ProcessDataOutput: - """Extract children and sub-parents from workspace data.""" - children: list[str] = [] - sub_parents: list[str] = [] - - for workspace in data: - # append child workspace IDs - children.append(workspace.id) - - # if hierarchy is present and has children, append child workspace ID to sub_parents - if workspace.meta and workspace.meta.hierarchy: - if workspace.meta.hierarchy.children_count > 0: - sub_parents.append(workspace.id) - return InputLoader._ProcessDataOutput(children, sub_parents) - - @staticmethod - def log_paging_progress(response: WorkspaceResponse) -> None: - """Log the progress of paging through API responses if paginatino data is present""" - current_page: int | None - total_pages: int | None - - if response.meta.page: - current_page = response.meta.page.number + 1 - total_pages = response.meta.page.total_pages - else: - current_page = None - total_pages = None - - if current_page and total_pages: - logger.info(f"Fetched page: {current_page} of {total_pages}") - - def _paginate(self, url: str | None): - result: list[InputLoader._ProcessDataOutput] = [] - while url: - response: WorkspaceResponse = self.fetch_page(url) - self.log_paging_progress(response) - result.append(self.process_data(response.data)) - url = response.links.next - - return result - - def get_hierarchy(self, parent_id: str) -> list[str]: - """Returns a list of workspace IDs in the hierarchy.""" - logger.info(f"Fetching children of {parent_id}") - url = self.hierarchy_endpoint.format(parent_id=parent_id) - - all_children, sub_parents = [], [] - - results: list[InputLoader._ProcessDataOutput] = self._paginate(url) - - for result in results: - all_children.extend(result.workspace_ids) - if result.sub_parents: - sub_parents.extend(result.sub_parents) - - for subparent in sub_parents: - all_children += self.get_hierarchy(subparent) - - if not all_children: - logger.warning( - f"No child workspaces found for parent workspace ID: {parent_id}" - ) - - return all_children - - def get_all_workspaces(self) -> list[str]: - """Returns a list of all workspace IDs in the organization.""" - # TODO: can be optimized - requests can be sent asynchronously. - # Use the total number of pages to calculate the number of requests - # to be sent. Use semaphore or otherwise limit the number of concurrent - # requests to avoid putting too much load on the server. - logger.info("Fetching all workspaces") - url = self.all_workspaces_endpoint - - all_workspaces: list[str] = [] - - results: list[InputLoader._ProcessDataOutput] = self._paginate(url) - - for result in results: - all_workspaces.extend(result.workspace_ids) - - if not all_workspaces: - logger.warning("No workspaces found in the organization.") - - return all_workspaces - - def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]: - """Returns the list of workspace IDs to back up based on the input type.""" - if input_type == "list-of-workspaces": - return self.read_csv_input_for_backup(path_to_csv) - - else: - if input_type == "list-of-parents": - list_of_parents = self.read_csv_input_for_backup(path_to_csv) - list_of_children: list[str] = [] - - for parent in list_of_parents: - list_of_children.extend(self.get_hierarchy(parent)) - - # Include the parent workspaces in the backup - return list_of_parents + list_of_children - - if input_type == "entire-organization": - list_of_workspaces = self.get_all_workspaces() - return list_of_workspaces - - raise RuntimeError("Invalid input type provided.") diff --git a/scripts/utils/constants.py b/scripts/utils/constants.py deleted file mode 100644 index 0dc385c..0000000 --- a/scripts/utils/constants.py +++ /dev/null @@ -1,54 +0,0 @@ -import datetime -from dataclasses import dataclass -from pathlib import Path - -from gooddata_sdk._version import __version__ as sdk_version - - -@dataclass(frozen=True) -class GoodDataProfile: - """ - Default path to the GoodData profile file. - """ - - FILE_NAME = "profiles.yaml" - DIRECTORY = ".gooddata" - PROFILE_PATH = Path.home() / DIRECTORY / FILE_NAME - - -@dataclass(frozen=True) -class DirNames: - """ - Folder names used in the SDK backup process: - - LAYOUTS - GoodData Layouts - - LDM - Logical Data Model - - AM - Analytics Model - - UDF - User Data Filters - """ - - LAYOUTS = "gooddata_layouts" - LDM = "ldm" - AM = "analytics_model" - UDF = "user_data_filters" - - -@dataclass(frozen=True) -class ConcurrencyDefaults: - MAX_WORKERS = 2 - DEFAULT_BATCH_SIZE = 100 - - -@dataclass(frozen=True) -class ApiDefaults: - DEFAULT_PAGE_SIZE = 100 - - -@dataclass(frozen=True) -class BackupSettings(ConcurrencyDefaults, ApiDefaults): - MAX_RETRIES = 3 - RETRY_DELAY = 5 # seconds - TIMESTAMP_SDK_FOLDER = ( - str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) - + "-" - + sdk_version.replace(".", "_") - ) diff --git a/scripts/utils/gd_api.py b/scripts/utils/gd_api.py deleted file mode 100644 index 393a1fa..0000000 --- a/scripts/utils/gd_api.py +++ /dev/null @@ -1,91 +0,0 @@ -# (C) 2025 GoodData Corporation - -import json -import logging -from typing import Any, TypeAlias - -import requests - -logger = logging.getLogger(__name__) - -API_VERSION = "v1" - -MaybeResponse: TypeAlias = requests.Response | None - - -class GoodDataRestApiError(Exception): - """Wrapper for errors occurring from interaction with GD REST API.""" - - -class GDApi: - """Wrapper for GoodData REST API client.""" - - # TODO: also defined in restore.py, consider moving to utils - def __init__(self, host: str, api_token: str, headers=None): - self.endpoint = self._handle_endpoint(host) - self.api_token = api_token - self.headers = headers if headers else {} - self.wait_api_time = 10 - - @staticmethod - def _handle_endpoint(host: str) -> str: - """Ensures that the endpoint URL is correctly formatted.""" - return ( - f"{host}api/{API_VERSION}" - if host[-1] == "/" - else f"{host}/api/{API_VERSION}" - ) - - def get( - self, - path: str, - params, - ok_code: int = 200, - not_found_code: int = 404, - ) -> MaybeResponse: - """Sends a GET request to the GoodData API.""" - kwargs = self._prepare_request(path, params) - logger.debug(f"GET request: {json.dumps(kwargs)}") - response = requests.get(**kwargs) - return self._resolve_return_code( - response, ok_code, kwargs["url"], "RestApi.get", not_found_code - ) - - def _prepare_request(self, path: str, params=None) -> dict[str, Any]: - """Prepares the request to be sent to the GoodData API.""" - kwargs: dict[str, Any] = { - "url": f"{self.endpoint}/{path}", - "headers": self.headers.copy(), - } - if params: - kwargs["params"] = params - if self.api_token: - kwargs["headers"]["Authorization"] = f"Bearer {self.api_token}" - else: - raise RuntimeError( - "Token required for authentication against GD API is missing." - ) - # TODO - Currently no credentials validation - # TODO - do we also support username+pwd auth? Or do we enforce token only? - # else: - # kwargs['auth'] = (self.user, self.password) if self.user is not None else None # noqa - return kwargs - - @staticmethod - def _resolve_return_code( - response, ok_code: int, url, method, not_found_code: int | None = None - ) -> MaybeResponse: - """Resolves the return code of the response.""" - # TODO: this can be simplified, it would be more transparent to evaluate the - # requests.Response.status_code directly in each particular use case rather than - # checking if a "MaybeResponse" type is None or not. - if response.status_code == ok_code: - logger.debug(f"{method} to {url} succeeded") - return response - if not_found_code and response.status_code == not_found_code: - logger.debug(f"{method} to {url} failed - target not found") - return None - raise GoodDataRestApiError( - f"{method} to {url} failed - " - f"response_code={response.status_code} message={response.text}" - ) diff --git a/scripts/utils/models/__init__.py b/scripts/utils/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/utils/models/batch.py b/scripts/utils/models/batch.py deleted file mode 100644 index 9bca78e..0000000 --- a/scripts/utils/models/batch.py +++ /dev/null @@ -1,15 +0,0 @@ -from dataclasses import dataclass -from typing import Annotated - -from pydantic import BaseModel, Field # type: ignore[import] - - -@dataclass -class BackupBatch: - list_of_ids: list[str] - - -class Size(BaseModel): - """Model to ensure valid batch or page size, i.e., integer greater than 0.""" - - size: Annotated[int, Field(gt=0, description="Batch size must be greater than 0")] diff --git a/scripts/utils/models/workspace_response.py b/scripts/utils/models/workspace_response.py deleted file mode 100644 index 9a7ae99..0000000 --- a/scripts/utils/models/workspace_response.py +++ /dev/null @@ -1,49 +0,0 @@ -from pydantic import ( # type: ignore[import] # missing type stub - BaseModel, - ConfigDict, -) -from pydantic.alias_generators import ( # type: ignore[import] # missing type stub - to_camel, -) - - -class Page(BaseModel): - size: int - total_elements: int - total_pages: int - number: int - - model_config = ConfigDict( - alias_generator=to_camel, - populate_by_name=True, - ) - - -class Hierarchy(BaseModel): - children_count: int - - model_config = ConfigDict( - alias_generator=to_camel, - populate_by_name=True, - ) - - -class Meta(BaseModel): - page: Page | None = None - hierarchy: Hierarchy | None = None - - -class Workspace(BaseModel): - id: str - meta: Meta | None = None - - -class Links(BaseModel): - self: str - next: str | None = None - - -class WorkspaceResponse(BaseModel): - data: list[Workspace] - links: Links - meta: Meta diff --git a/scripts/utils/utils.py b/scripts/utils/utils.py index 53f2d1d..7d2fb49 100644 --- a/scripts/utils/utils.py +++ b/scripts/utils/utils.py @@ -5,62 +5,119 @@ import logging import os from pathlib import Path -from typing import Any, Protocol, Type, TypeVar, cast +from typing import Any, Generic, Protocol, Type, TypeVar, overload logger = logging.getLogger(__name__) +TOKEN_ENV_VAR_NAME = "GDC_AUTH_TOKEN" +HOSTNAME_ENV_VAR_NAME = "GDC_HOSTNAME" -class PipelinesClient(Protocol): + +def read_csv_file_to_dict( + file_path: Path, delimiter: str = ",", quotechar: str = '"' +) -> list[dict[str, Any]]: + """Read a CSV file and return its content as a list of dictionaries. + + Args: + file_path (Path): The path to the CSV file. + delimiter (str): The delimiter used in the CSV file. + quotechar (str): The quote character used in the CSV file. + Returns: + list[dict[str, Any]]: A list of dictionaries where each dictionary represents + a row in the CSV file, with keys as column headers and values as row values. + """ + with open(file_path, "r", encoding="utf-8") as file: + return list(csv.DictReader(file, delimiter=delimiter, quotechar=quotechar)) + + +class SimpleClient(Protocol): """Protocol for GoodData Pipelines clients (Provisioners, Managers...).""" @classmethod - def create(cls, host: str, token: str) -> "PipelinesClient": + def create(cls, host: str, token: str) -> "SimpleClient": + pass + + @classmethod + def create_from_profile(cls, profile: str, profiles_path: Path) -> "SimpleClient": + pass + + +ConfigType = TypeVar("ConfigType") + + +class ConfiguredClient(Protocol, Generic[ConfigType]): + """Protocol for clients requiring a configuration object.""" + + @classmethod + def create( + cls, config: ConfigType, host: str, token: str + ) -> "ConfiguredClient[ConfigType]": pass @classmethod def create_from_profile( - cls, profile: str, profiles_path: Path - ) -> "PipelinesClient": + cls, config: ConfigType, profile: str, profiles_path: Path + ) -> "ConfiguredClient[ConfigType]": pass -T = TypeVar("T", bound="PipelinesClient") +SimpleT = TypeVar("SimpleT", bound="SimpleClient") +ConfiguredT = TypeVar("ConfiguredT", bound="ConfiguredClient[Any]") -def read_csv_file_to_dict( - file_path: Path, delimiter: str = ",", quotechar: str = '"' -) -> list[dict[str, Any]]: - """Read a CSV file and return its content as a list of dictionaries. +@overload +def create_client( + client_type: Type[SimpleT], profile_config: Path, profile: str +) -> SimpleT: ... - Args: - file_path (str): The path to the CSV file. - Returns: - list[dict[str, str]]: A list of dictionaries where each dictionary represents - a row in the CSV file, with keys as column headers and values as row values. - """ - with open(file_path, "r", encoding="utf-8") as file: - return list(csv.DictReader(file, delimiter=delimiter, quotechar=quotechar)) +@overload +def create_client( + client_type: Type[ConfiguredT], + profile_config: Path, + profile: str, + config: ConfigType, +) -> ConfiguredT: ... -def create_client(client_type: Type[T], profile_config: Path, profile: str) -> T: - """Creates GoodData Pipelines client of given type.""" - gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") - gdc_hostname = os.environ.get("GDC_HOSTNAME") + +def create_client( + client_type: Type[Any], + profile_config: Path, + profile: str, + config: Any | None = None, +) -> Any: + """Creates a GoodData Pipelines client. + + Depending on whether "config" is provided, the function will call the + appropriate classmethod on the provided client type. + """ + gdc_auth_token = os.environ.get(TOKEN_ENV_VAR_NAME) + gdc_hostname = os.environ.get(HOSTNAME_ENV_VAR_NAME) if gdc_hostname and gdc_auth_token: - logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") - return cast(T, client_type.create(host=gdc_hostname, token=gdc_auth_token)) + if not gdc_hostname.strip() or not gdc_auth_token.strip(): + raise ValueError( + f"Environment variables {HOSTNAME_ENV_VAR_NAME} and " + f"{TOKEN_ENV_VAR_NAME} cannot be empty strings." + ) + logger.info(f"Using {HOSTNAME_ENV_VAR_NAME} and {TOKEN_ENV_VAR_NAME} envvars.") + if config is None: + return client_type.create(host=gdc_hostname, token=gdc_auth_token) + return client_type.create( + config=config, host=gdc_hostname, token=gdc_auth_token + ) if os.path.exists(profile_config): logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") - return cast( - T, - client_type.create_from_profile( + if config is None: + return client_type.create_from_profile( profile=profile, profiles_path=profile_config - ), + ) + return client_type.create_from_profile( + config=config, profile=profile, profiles_path=profile_config ) raise RuntimeError( "No GoodData credentials provided. Please export required ENVVARS " - "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." + f"({HOSTNAME_ENV_VAR_NAME}, {TOKEN_ENV_VAR_NAME}) or provide path to GD profile config." ) diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 37d863d..0000000 --- a/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# (C) 2025 GoodData Corporation diff --git a/tests/test_backup_restore_config.py b/tests/test_backup_restore_config.py new file mode 100644 index 0000000..08d2c29 --- /dev/null +++ b/tests/test_backup_restore_config.py @@ -0,0 +1,31 @@ +# (C) 2025 GoodData Corporation + +import os +import sys + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) +) + + +from pathlib import Path + +from gooddata_pipelines import LocalStorageConfig, S3StorageConfig, StorageType +from utils.backup_restore_config import load_config_from_yaml # type: ignore + + +def test_local_storage_config(): + config = load_config_from_yaml(Path("tests/data/backup/test_local_conf.yaml")) + + assert isinstance(config.storage, LocalStorageConfig) + assert config.storage_type == StorageType.LOCAL + assert config.storage.backup_path == "local_backups" + + +def test_s3_storage_config(): + config = load_config_from_yaml(Path("tests/data/backup/test_conf.yaml")) + + assert isinstance(config.storage, S3StorageConfig) + assert config.storage_type == StorageType.S3 + assert config.storage.backup_path == "some/s3/backup/path/org_id/" + assert config.storage.bucket == "some-s3-bucket" From 4250a0cbd3559c8e16de38f41c0ff5a19f1f7d31 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Mon, 13 Oct 2025 16:38:27 +0200 Subject: [PATCH 4/5] refactor: remove redundant tests --- .github/workflows/python.yaml | 63 +-- tests/conftest.py | 33 -- tests/data/__init__.py | 0 tests/data/backup/__init__.py | 0 .../analytical_dashboard_extensions/.gitkeep | 0 .../analytical_dashboards/.gitkeep | 0 .../dashboard_plugins/.gitkeep | 0 .../analytics_model/filter_contexts/.gitkeep | 0 .../wsid1/analytics_model/metrics/.gitkeep | 0 .../visualization_objects/.gitkeep | 0 .../workspaces/wsid1/ldm/datasets/test.yaml | 1 - .../ldm/date_instances/testinstance.yaml | 1 - .../analytical_dashboard_extensions/.gitkeep | 0 .../analytical_dashboards/id.yaml | 1 - .../dashboard_plugins/.gitkeep | 0 .../analytics_model/filter_contexts/id.yaml | 0 .../wsid2/analytics_model/metrics/.gitkeep | 0 .../visualization_objects/test.yaml | 1 - .../workspaces/wsid2/ldm/datasets/.gitkeep | 0 .../wsid2/ldm/date_instances/.gitkeep | 0 .../analytical_dashboard_extensions/.gitkeep | 0 .../analytical_dashboards/.gitkeep | 0 .../dashboard_plugins/.gitkeep | 0 .../analytics_model/filter_contexts/.gitkeep | 0 .../wsid3/analytics_model/metrics/.gitkeep | 0 .../visualization_objects/.gitkeep | 0 .../workspaces/wsid3/ldm/datasets/.gitkeep | 0 .../wsid3/ldm/date_instances/.gitkeep | 0 .../wsid3/user_data_filters/.gitignore | 0 tests/data/permission_mgmt/input.csv | 24 - tests/data/restore/test.csv | 5 - tests/data/restore/test_conf.yaml | 4 - .../ldm/datasets/some_dataset_id.yaml | 15 - .../user_data_filters/datafilter2.yaml | 6 - .../user_data_filters/datafilter4.yaml | 6 - tests/data/user_group_mgmt/input.csv | 5 - tests/data/user_mgmt/input.csv | 8 - tests/fake_aws_creds.sh | 3 - tests/test_backup.py | 439 --------------- tests/test_restore.py | 523 ------------------ tests/test_user_group_mgmt.py | 97 ---- tests/test_user_mgmt.py | 25 - tests/test_utils/__init__.py | 0 .../test_utils/test_backup_utils/__init__.py | 0 .../test_backup_utils/test_input_loader.py | 205 ------- 45 files changed, 30 insertions(+), 1435 deletions(-) delete mode 100644 tests/conftest.py delete mode 100644 tests/data/__init__.py delete mode 100644 tests/data/backup/__init__.py delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboard_extensions/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboards/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/dashboard_plugins/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/filter_contexts/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/metrics/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/visualization_objects/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml delete mode 100644 tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboard_extensions/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/dashboard_plugins/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/metrics/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/datasets/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/date_instances/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboard_extensions/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboards/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/dashboard_plugins/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/filter_contexts/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/metrics/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/visualization_objects/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/datasets/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/date_instances/.gitkeep delete mode 100644 tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters/.gitignore delete mode 100644 tests/data/permission_mgmt/input.csv delete mode 100644 tests/data/restore/test.csv delete mode 100644 tests/data/restore/test_conf.yaml delete mode 100644 tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml delete mode 100644 tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml delete mode 100644 tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml delete mode 100644 tests/data/user_group_mgmt/input.csv delete mode 100644 tests/data/user_mgmt/input.csv delete mode 100644 tests/fake_aws_creds.sh delete mode 100644 tests/test_backup.py delete mode 100644 tests/test_restore.py delete mode 100644 tests/test_user_group_mgmt.py delete mode 100644 tests/test_user_mgmt.py delete mode 100644 tests/test_utils/__init__.py delete mode 100644 tests/test_utils/test_backup_utils/__init__.py delete mode 100644 tests/test_utils/test_backup_utils/test_input_loader.py diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 21288d0..8fccb85 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -11,44 +11,41 @@ jobs: runs-on: ubuntu-latest steps: - - name: checkout - uses: actions/checkout@v3 + - name: checkout + uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install tox + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox - - name: Run lint - run: tox -e lint - - - name: Run type - run: tox -e type + - name: Run lint + run: tox -e lint + + - name: Run type + run: tox -e type test-python: runs-on: ubuntu-latest steps: - - name: checkout - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install tox - - - name: Set up fake AWS credentials - run: sh tests/fake_aws_creds.sh - - - name: Run tests - run: tox -e 3.11 + - name: checkout + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + + - name: Run tests + run: tox -e 3.11 diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 40ab1c0..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,33 +0,0 @@ -# (C) 2025 GoodData Corporation -import os -from typing import Generator - -import boto3 -import pytest - - -@pytest.fixture(scope="session", autouse=True) -def aws_credentials() -> Generator[None, None, None]: - """ - Set dummy AWS credentials for the entire test session. - This is an autouse fixture, so it runs automatically. - """ - os.environ["AWS_ACCESS_KEY_ID"] = "testing" - os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["AWS_SECURITY_TOKEN"] = "testing" - os.environ["AWS_SESSION_TOKEN"] = "testing" - os.environ["AWS_DEFAULT_REGION"] = "us-east-1" - yield - - -@pytest.fixture -def mock_boto_session(mocker): - """ - Mocks boto3.Session to prevent it from using a real AWS profile. - It will return a default Session object, which then uses the - dummy credentials set by the conftest.py fixture. - """ - # We patch boto3.Session and make it return a new, default session object. - # This new object will not have the `profile_name` and will fall back - # to using the environment variables we set in conftest. - mocker.patch("boto3.Session", return_value=boto3.Session()) diff --git a/tests/data/__init__.py b/tests/data/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/__init__.py b/tests/data/backup/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboard_extensions/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboard_extensions/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboards/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboards/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/dashboard_plugins/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/dashboard_plugins/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/filter_contexts/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/filter_contexts/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/metrics/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/metrics/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/visualization_objects/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/visualization_objects/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml deleted file mode 100644 index 56a6051..0000000 --- a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml +++ /dev/null @@ -1 +0,0 @@ -1 \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml deleted file mode 100644 index d8263ee..0000000 --- a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml +++ /dev/null @@ -1 +0,0 @@ -2 \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboard_extensions/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboard_extensions/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml deleted file mode 100644 index 9b26e9b..0000000 --- a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml +++ /dev/null @@ -1 +0,0 @@ -+ \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/dashboard_plugins/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/dashboard_plugins/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/metrics/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/metrics/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml deleted file mode 100644 index 05a5366..0000000 --- a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml +++ /dev/null @@ -1 +0,0 @@ -id \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/datasets/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/datasets/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/date_instances/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/date_instances/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboard_extensions/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboard_extensions/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboards/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboards/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/dashboard_plugins/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/dashboard_plugins/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/filter_contexts/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/filter_contexts/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/metrics/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/metrics/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/visualization_objects/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/visualization_objects/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/datasets/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/datasets/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/date_instances/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/date_instances/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters/.gitignore b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters/.gitignore deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/permission_mgmt/input.csv b/tests/data/permission_mgmt/input.csv deleted file mode 100644 index 2c0b9b5..0000000 --- a/tests/data/permission_mgmt/input.csv +++ /dev/null @@ -1,24 +0,0 @@ -user_id,ug_id,ws_id,ws_permissions,is_active -bad,row, -user_1,,ws_id_1,ANALYZE,True -user_1,,ws_id_1,VIEW,True -user_1,,ws_id_1,MANAGE,False -user_2,,ws_id_1,ANALYZE,True -user_2,,ws_id_1,MANAGE,True -,ug_1,ws_id_1,ANALYZE,True -,ug_1,ws_id_1,VIEW,True -,ug_1,ws_id_1,MANAGE,False -,ug_2,ws_id_1,ANALYZE,True -,ug_2,ws_id_1,MANAGE,True -user,ug,ws_id_1,ANALYZE,True -user_1,,ws_id_3,ANALYZE,True -,,ws_id_1,ANALYZE,True -user_1,,ws_id_1,ANALYZE, -user_1,,ws_id_1,,True -user_1,,,ANALYZE,True -user_1,,ws_id_2,MANAGE,True -user_2,,ws_id_2,ANALYZE,False -user_3,,ws_id_2,MANAGE,True -,ug_1,ws_id_2,MANAGE,True -,ug_2,ws_id_2,ANALYZE,False -,ug_3,ws_id_2,MANAGE,True diff --git a/tests/data/restore/test.csv b/tests/data/restore/test.csv deleted file mode 100644 index 1d68747..0000000 --- a/tests/data/restore/test.csv +++ /dev/null @@ -1,5 +0,0 @@ -workspace_id,path -thiswsdoesnotexist,thiswsdoesnotexist/blabla -ws_id_1,ws_id_1/bla -ws_id_2,ws_id_2/bla -ws_id_3,ws_id_2/bla diff --git a/tests/data/restore/test_conf.yaml b/tests/data/restore/test_conf.yaml deleted file mode 100644 index 9a4d005..0000000 --- a/tests/data/restore/test_conf.yaml +++ /dev/null @@ -1,4 +0,0 @@ -storage_type: s3 -storage: - bucket: some-s3-bucket - backup_path: some/s3/backup/path/org_id/ diff --git a/tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml b/tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml deleted file mode 100644 index 4032fa0..0000000 --- a/tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml +++ /dev/null @@ -1,15 +0,0 @@ -grain: - - id: some_dataset_id.some_id - type: attribute -id: some_dataset_id -references: - - identifier: - id: some_ref_id - type: dataset - multivalue: false - sourceColumns: - - country_id -title: Test Dataset -workspaceDataFilterColumns: - - dataType: STRING - name: wdf__country diff --git a/tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml b/tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml deleted file mode 100644 index be622d6..0000000 --- a/tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml +++ /dev/null @@ -1,6 +0,0 @@ -id: datafilter2 -maql: '{label/campaign_channels.category} = "1"' -title: Status filter -user: - id: 5c867a8a-12af-45bf-8d85-c7d16bedebd1 - type: user diff --git a/tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml b/tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml deleted file mode 100644 index ca63315..0000000 --- a/tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml +++ /dev/null @@ -1,6 +0,0 @@ -id: datafilter4 -maql: '{label/campaign_channels.category} = "1"' -title: Status filter -user: - id: 5c867a8a-12af-45bf-8d85-c7d16bedebd1 - type: user diff --git a/tests/data/user_group_mgmt/input.csv b/tests/data/user_group_mgmt/input.csv deleted file mode 100644 index 6d91442..0000000 --- a/tests/data/user_group_mgmt/input.csv +++ /dev/null @@ -1,5 +0,0 @@ -user_group_id,user_group_name,parent_user_groups,is_active -ug_1,Admins,,True -ug_2,Developers,ug_1,True -ug_3,Testers,ug_1|ug_2,True -ug_4,TemporaryAccess,ug_2,False diff --git a/tests/data/user_mgmt/input.csv b/tests/data/user_mgmt/input.csv deleted file mode 100644 index b1372aa..0000000 --- a/tests/data/user_mgmt/input.csv +++ /dev/null @@ -1,8 +0,0 @@ -user_id, firstname, lastname, email, auth_id, user_groups, is_active -jozef.mrkva,jozef,mrkva,jozef.mrkva@test.com,auth_id_1,,True -bartolomej.brokolica,,,,auth_id_2,,False -peter.pertzlen,peter,pertzlen,peter.pertzlen@test.com,auth_id_3,ug_1|ug_2,True -zoltan.zeler,zoltan,zeler,zoltan.zeler@test.com,auth_id_4,ug_1,True -kristian.kalerab,kristian,kalerab,,auth_id_5,,True -richard.cvikla,,,richard.cvikla@test.com,auth_id_6,ug_1|ug_2,False -adam.avokado,,,,auth_id_7,,False diff --git a/tests/fake_aws_creds.sh b/tests/fake_aws_creds.sh deleted file mode 100644 index 997d417..0000000 --- a/tests/fake_aws_creds.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -# This creates fake ~/.aws/credentials file for boto3 mocking -mkdir ~/.aws && touch ~/.aws/credentials && echo "[default]\naws_access_key_id = test\naws_secret_access_key = test" > ~/.aws/credentials diff --git a/tests/test_backup.py b/tests/test_backup.py deleted file mode 100644 index 00744cb..0000000 --- a/tests/test_backup.py +++ /dev/null @@ -1,439 +0,0 @@ -# (C) 2025 GoodData Corporation -import os -import sys - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) -) -import argparse -import os -import shutil -import tempfile -import threading -from pathlib import Path -from typing import Any -from unittest import mock - -import boto3 -import pytest -from gooddata_sdk.sdk import GoodDataSdk -from moto import mock_aws - -import scripts.backup as backup -from scripts.utils.models.batch import Size - -MOCK_DL_TARGET = Path("overlays.zip") -TEST_CONF_PATH = "tests/data/backup/test_conf.yaml" -TEST_LOCAL_CONF_PATH = "tests/data/backup/test_local_conf.yaml" - -S3_BACKUP_PATH = "some/s3/backup/path/org_id/" -S3_BUCKET = "some-s3-bucket" - -MOCK_SDK = GoodDataSdk.create("host", "token") - - -class MockGdWorkspace: - def __init__(self, id: str) -> None: - self.id = id - - -class MockResponse: - def __init__(self, status_code, json_response=None, text: str = ""): - self.status_code = status_code - self.json_response = json_response if json_response else {} - self.text = text - - def json(self): - return self.json_response - - -def mock_requests_get(**kwargs): - body: dict[str, list[Any]] = {"userDataFilters": []} - return MockResponse(200, body) - - -def mock_requests(): - requests = mock.Mock() - requests.get.side_effect = mock_requests_get - return requests - - -@pytest.fixture() -def s3(aws_credentials): - with mock_aws(): - yield boto3.resource("s3") - - -@pytest.fixture(scope="function") -def s3_bucket(s3): - s3.create_bucket(Bucket=S3_BUCKET) - yield s3.Bucket(S3_BUCKET) - - -@pytest.fixture(scope="function") -def create_backups_in_bucket(s3_bucket): - def create_backups(ws_ids: list[str], is_e2e: bool = False, suffix: str = "bla"): - # If used within e2e test, add some suffix to path - # in order to simulate a more realistic scenario - path_suffix = f"/{suffix}" if is_e2e else "" - - for ws_id in ws_ids: - s3_bucket.put_object( - Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/" - ) - s3_bucket.put_object( - Bucket=S3_BUCKET, - Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/gooddata_layouts.zip", - ) - - return create_backups - - -def assert_not_called_with(target, *args, **kwargs): - try: - target.assert_called_with(*args, **kwargs) - except AssertionError: - return - formatted_call = target._format_mock_call_signature(args, kwargs) - raise AssertionError(f"Expected {formatted_call} to not have been called.") - - -@mock.patch.dict(os.environ, {"GDC_HOSTNAME": "hostname", "GDC_AUTH_TOKEN": "token"}) -@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") -@mock.patch("gooddata_sdk.GoodDataSdk.create") -def test_gd_client_env(client_create_env, client_create_profile): - backup.create_client(argparse.Namespace()) - client_create_env.assert_called_once_with("hostname", "token") - client_create_profile.assert_not_called() - - -@mock.patch.dict(os.environ, {}, clear=True) -@mock.patch("scripts.backup.create_api_client_from_profile") -@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") -@mock.patch("gooddata_sdk.GoodDataSdk.create") -@mock.patch("os.path.exists") -def test_gd_client_profile( - path_exists, - client_create_env, - client_create_profile, - create_api_client_from_profile, -): - path_exists.return_value = True - args = argparse.Namespace( - profile_config="gdc_profile_config_path", - profile="gdc_profile", - ) - backup.create_client(args) - client_create_env.assert_not_called() - client_create_profile.assert_called_once_with( - "gdc_profile", "gdc_profile_config_path" - ) - create_api_client_from_profile.assert_called_once_with( - "gdc_profile", "gdc_profile_config_path" - ) - - -@mock.patch.dict(os.environ, {}, clear=True) -def test_gd_client_no_creds_raises_error(): - args = argparse.Namespace( - profile_config="", - profile="", - ) - with pytest.raises(RuntimeError): - backup.create_client(args) - - -# Incorrect ws_csv and conf args throw error -@pytest.mark.parametrize("conf_path", ["", "configuration_nonexist.yaml"]) -@pytest.mark.parametrize("csv_path", ["", "input_nonexist.csv"]) -def test_wrong_wscsv_conf_raise_error(csv_path, conf_path): - args = argparse.Namespace( - ws_csv=csv_path, conf=conf_path, input_type="list-of-workspaces", verbose=False - ) - with pytest.raises(RuntimeError): - backup.validate_args(args) - - -def test_wrong_input_type_raises_error(): - args = argparse.Namespace( - ws_csv="input.csv", conf="conf.yaml", input_type="wrong-input-type" - ) - with pytest.raises(RuntimeError): - backup.validate_args(args) - - -def test_get_s3_storage(): - s3_storage_type = backup.get_storage("s3") - assert s3_storage_type == backup.S3Storage - - -def test_get_local_storage(): - local_storage_type = backup.get_storage("local") - assert local_storage_type == backup.LocalStorage - - -def test_get_unknown_storage_raises_error(): - with pytest.raises(RuntimeError): - backup.get_storage("unknown_storage") - - -# Test that zipping gooddata_layouts folder works -def test_archive_gooddata_layouts_to_zip(): - with tempfile.TemporaryDirectory() as tmpdir: - shutil.copytree( - Path("tests/data/backup/test_exports/services/"), Path(tmpdir + "/services") - ) - backup.archive_gooddata_layouts_to_zip(str(Path(tmpdir, "services"))) - - zip_exists = os.path.isfile( - Path( - tmpdir, "services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts.zip" - ) - ) - gooddata_layouts_dir_exists = os.path.isdir( - Path(tmpdir, "services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts") - ) - - assert gooddata_layouts_dir_exists is False - assert zip_exists - - zip_exists = os.path.isfile( - Path( - tmpdir, "services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts.zip" - ) - ) - gooddata_layouts_dir_exists = os.path.isdir( - Path(tmpdir, "services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts") - ) - - assert gooddata_layouts_dir_exists is False - assert zip_exists - - zip_exists = os.path.isfile( - Path( - tmpdir, "services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts.zip" - ) - ) - gooddata_layouts_dir_exists = os.path.isdir( - Path(tmpdir, "services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts") - ) - - assert gooddata_layouts_dir_exists is False - assert zip_exists - - -@mock.patch("utils.gd_api.requests", new_callable=mock_requests) -def test_get_user_data_filters_normal_response(requests): - api = backup.GDApi("some.host.com", "token") - - response = backup.get_user_data_filters( - api, - "workspace", - ) - assert response == {"userDataFilters": []} - - -def test_store_user_data_filters(): - user_data_filters = { - "userDataFilters": [ - { - "id": "datafilter2", - "maql": '{label/campaign_channels.category} = "1"', - "title": "Status filter", - "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, - }, - { - "id": "datafilter4", - "maql": '{label/campaign_channels.category} = "1"', - "title": "Status filter", - "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, - }, - ] - } - user_data_filter_folderlocation = Path( - "tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/user_data_filters" - ) - backup.store_user_data_filters( - user_data_filters, - Path( - "tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5" - ), - "services", - "wsid1", - ) - user_data_filter_folder = os.path.isdir(Path(user_data_filter_folderlocation)) - user_data_filter2 = os.path.isfile( - Path(f"{user_data_filter_folderlocation}/datafilter2.yaml") - ) - user_data_filter4 = os.path.isfile( - Path(f"{user_data_filter_folderlocation}/datafilter4.yaml") - ) - assert user_data_filter_folder - assert user_data_filter2 - assert user_data_filter4 - - count = 0 - for path in os.listdir(user_data_filter_folderlocation): - if os.path.isfile(os.path.join(user_data_filter_folderlocation, path)): - count += 1 - - assert count == 2 - - shutil.rmtree( - "tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/user_data_filters" - ) - - -def test_local_storage_export(): - with tempfile.TemporaryDirectory() as tmpdir: - org_store_location = Path(tmpdir + "/services") - shutil.copytree( - Path("tests/data/backup/test_exports/services/"), org_store_location - ) - - backup.LocalStorage.export( - self=backup.LocalStorage(backup.BackupRestoreConfig(TEST_LOCAL_CONF_PATH)), - folder=tmpdir, - org_id="services", - export_folder="tests/data/local_export", - ) - local_export_folder_exist = os.path.isdir( - Path( - "tests/data/local_export/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model" - ) - ) - local_export_folder2_exist = os.path.isdir( - Path( - "tests/data/local_export/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm" - ) - ) - - local_export_folder3_exist = os.path.isdir( - Path( - "tests/data/local_export/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters" - ) - ) - - local_export_file_exist = os.path.isfile( - Path( - "tests/data/local_export/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml" - ) - ) - assert local_export_folder_exist - assert local_export_folder2_exist - assert local_export_folder3_exist - assert local_export_file_exist - shutil.rmtree("tests/data/local_export") - - -def test_file_upload(s3, s3_bucket, mock_boto_session): - conf = backup.BackupRestoreConfig(TEST_CONF_PATH) - s3storage = backup.get_storage("s3")(conf) - s3storage.export("tests/data/backup/test_exports", "services") - s3.Object( - S3_BUCKET, - "some/s3/backup/path/org_id/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml", - ).load() - - -def test_split_to_batches(): - workspaces = ["ws1", "ws2", "ws3", "ws4", "ws5"] - batch_size = Size(size=2) - expected_batches = [ - backup.BackupBatch(["ws1", "ws2"]), - backup.BackupBatch(["ws3", "ws4"]), - backup.BackupBatch(["ws5"]), - ] - - result = backup.split_to_batches(workspaces, batch_size) - - for i, batch in enumerate(result): - assert isinstance(batch, backup.BackupBatch) - assert batch.list_of_ids == expected_batches[i].list_of_ids - - -@mock.patch("scripts.backup.archive_gooddata_layouts_to_zip") -@mock.patch("scripts.backup.get_workspace_export") -def test_process_batch_success(get_workspace_export_mock, archive_zip_mock): - sdk = mock.Mock() - api = mock.Mock() - org_id = "org" - storage = mock.Mock() - batch = backup.BackupBatch(["ws1", "ws2"]) - - backup.process_batch( - sdk=sdk, - api=api, - org_id=org_id, - storage=storage, - batch=batch, - stop_event=threading.Event(), - ) - - get_workspace_export_mock.assert_called_once() - archive_zip_mock.assert_called_once() - storage.export.assert_called_once() - - -@mock.patch("scripts.backup.logger") -@mock.patch("scripts.backup.archive_gooddata_layouts_to_zip") -@mock.patch("scripts.backup.get_workspace_export") -def test_process_batch_retries_on_exception( - get_workspace_export_mock, _archive_zip_mock, logger_mock -): - sdk = mock.Mock() - api = mock.Mock() - org_id = "org" - storage = mock.Mock() - batch = backup.BackupBatch(["ws1"]) - # Raise exception on first call, succeed on second - call_count = {"count": 0} - - def fail_once(*args, **kwargs): - if call_count["count"] == 0: - call_count["count"] += 1 - raise Exception("fail") - return None - - get_workspace_export_mock.side_effect = fail_once - - backup.process_batch( - sdk=sdk, - api=api, - org_id=org_id, - storage=storage, - batch=batch, - stop_event=threading.Event(), - ) - - assert get_workspace_export_mock.call_count == 2 - assert logger_mock.info.call_args_list[0][0][0].startswith( - "Exception encountered while processing a batch. Retrying" - ) - storage.export.assert_called_once() - - -@mock.patch("scripts.backup.logger") -@mock.patch("scripts.backup.archive_gooddata_layouts_to_zip") -@mock.patch("scripts.backup.get_workspace_export") -def test_process_batch_raises_after_max_retries( - get_workspace_export_mock, _archive_zip_mock, logger_mock -): - sdk = mock.Mock() - api = mock.Mock() - org_id = "org" - storage = mock.Mock() - batch = backup.BackupBatch(["ws1"]) - get_workspace_export_mock.side_effect = Exception("fail") - - with pytest.raises(Exception, match="fail"): - backup.process_batch( - sdk=sdk, - api=api, - org_id=org_id, - storage=storage, - batch=batch, - stop_event=threading.Event(), - retry_count=backup.BackupSettings.MAX_RETRIES, - ) - logger_mock.error.assert_called() diff --git a/tests/test_restore.py b/tests/test_restore.py deleted file mode 100644 index 4e39a04..0000000 --- a/tests/test_restore.py +++ /dev/null @@ -1,523 +0,0 @@ -# (C) 2025 GoodData Corporation -import os -import sys - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) -) - -import argparse -import json -import logging -import tempfile -from pathlib import Path -from unittest import mock - -import boto3 -import pytest -from gooddata_sdk.sdk import GoodDataSdk -from moto import mock_aws - -from scripts import restore - -LOGGER_NAME = "restore.py" -MOCK_DL_TARGET = Path("overlays.zip") -TEST_CONF_PATH = "tests/data/restore/test_conf.yaml" -TEST_CSV_PATH = "tests/data/restore/test.csv" -TEST_LDM_PATH = Path("tests/data/restore/test_ldm_load") -TEST_UDF_PATH = Path("tests/data/restore/test_user_data_filters/") - -S3_BACKUP_PATH = "some/s3/backup/path/org_id/" -S3_BUCKET = "some-s3-bucket" - - -class MockGdWorkspace: - def __init__(self, id: str) -> None: - self.id = id - - -@pytest.fixture -def s3(aws_credentials: None): - """Yields a mocked S3 client that can be used for testing.""" - with mock_aws(): - yield boto3.resource("s3", region_name="us-east-1") - - -@pytest.fixture() -def s3_bucket(s3): - s3.create_bucket(Bucket=S3_BUCKET) - yield s3.Bucket(S3_BUCKET) - - -@mock_aws -@pytest.fixture() -def create_backups_in_bucket(s3_bucket): - def create_backups(ws_ids: list[str], is_e2e: bool = False, suffix: str = "bla"): - # If used within e2e test, add some suffix to path - # in order to simulate a more realistic scenario - path_suffix = f"/{suffix}" if is_e2e else "" - - for ws_id in ws_ids: - s3_bucket.put_object(Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/") - s3_bucket.put_object( - Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/gooddata_layouts.zip", - ) - - return create_backups - - -def assert_not_called_with(target, *args, **kwargs): - try: - target.assert_called_with(*args, **kwargs) - except AssertionError: - return - formatted_call = target._format_mock_call_signature(args, kwargs) - raise AssertionError(f"Expected {formatted_call} to not have been called.") - - -@mock.patch.dict(os.environ, {"GDC_HOSTNAME": "hostname", "GDC_AUTH_TOKEN": "token"}) -@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") -@mock.patch("gooddata_sdk.GoodDataSdk.create") -def test_gd_client_env(client_create_env, client_create_profile): - restore.create_client(argparse.Namespace()) - client_create_env.assert_called_once_with("hostname", "token") - client_create_profile.assert_not_called() - - -@mock.patch.dict(os.environ, {}, clear=True) -@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") -@mock.patch("scripts.restore.create_api_client_from_profile") -@mock.patch("gooddata_sdk.GoodDataSdk.create") -@mock.patch("os.path.exists") -def test_gd_client_profile( - path_exists, - client_create_env, - client_create_profile, - create_api_client_from_profile, -): - path_exists.return_value = True - args = argparse.Namespace( - profile_config="gdc_profile_config_path", - profile="gdc_profile", - ) - restore.create_client(args) - client_create_env.assert_not_called() - client_create_profile.assert_called_once_with( - "gdc_profile", "gdc_profile_config_path" - ) - create_api_client_from_profile.assert_called_once_with( - "gdc_profile", "gdc_profile_config_path" - ) - - -@mock.patch.dict(os.environ, {}, clear=True) -def test_gd_client_no_creds_raises_error(): - args = argparse.Namespace( - profile_config="", - profile="", - ) - with pytest.raises(RuntimeError): - restore.create_client(args) - - -@pytest.mark.parametrize("csv_path", ["", "bad/path"]) -@mock.patch("scripts.restore.create_client") -def test_bad_csv_path_raises_error(_, csv_path): - args = argparse.Namespace(ws_csv=csv_path, verbose=False) - with pytest.raises(RuntimeError): - restore.validate_args(args) - - -@pytest.mark.parametrize("conf_path", ["", "bad/path"]) -@mock.patch("scripts.restore.create_client") -def test_bad_conf_path_raises_error(_, conf_path): - args = argparse.Namespace(conf=conf_path, ws_csv=".", verbose=False) - with pytest.raises(RuntimeError): - restore.validate_args(args) - - -def test_get_s3_storage(): - s3_storage_type = restore.get_storage("s3") - assert s3_storage_type == restore.S3Storage - - -def test_get_unknown_storage_raises_error(): - with pytest.raises(RuntimeError): - restore.get_storage("unknown_storage") - - -@mock_aws -def test_s3_storage(mock_boto_session, create_backups_in_bucket): - create_backups_in_bucket(["ws_id"]) - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - storage = restore.S3Storage(conf) - - with tempfile.TemporaryDirectory() as tempdir: - target_path = Path(tempdir, MOCK_DL_TARGET) - storage.get_ws_declaration("ws_id/", target_path) - - -def test_s3_storage_no_target_only_dir(mock_boto_session, s3_bucket): - s3_bucket.put_object(Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}/ws_id/") - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - storage = restore.S3Storage(conf) - with pytest.raises(restore.BackupRestoreError): - storage.get_ws_declaration("ws_id/", MOCK_DL_TARGET) - - -def test_s3_storage_no_target(mock_boto_session, s3_bucket): - s3_bucket.put_object(Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}/bla/") - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - storage = restore.S3Storage(conf) - with pytest.raises(restore.BackupRestoreError): - storage.get_ws_declaration("bad_target/", MOCK_DL_TARGET) - - -def test_init_ldm_with_ws_data_filter_cols(): - # Regression test - this doesn't work for sdk 1.3 and lesser - sdk = GoodDataSdk.create("", "") - model = sdk.catalog_workspace_content.load_ldm_from_disk(TEST_LDM_PATH) - assert model.ldm is not None - assert len(model.ldm.datasets) == 1 - - -def test_validate_targets(caplog): - sdk = mock.Mock() - sdk.catalog_workspace.list_workspaces.return_value = [ - MockGdWorkspace(id=f"ws_id_{i}") for i in range(4) - ] - - ws_paths = {f"ws_id_{i}": "" for i in range(2, 6)} - - restore.validate_targets(sdk, ws_paths) - - assert len(caplog.record_tuples) == 1 - logger, level, msg = caplog.record_tuples[0] - assert logger == LOGGER_NAME - assert level == logging.ERROR - for i in range(4, 6): - assert f"ws_id_{i}" in msg - - -def test_bad_s3_bucket_raises_error(s3): - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - with pytest.raises(RuntimeError): - restore.S3Storage(conf) - - -def test_bad_s3_path_raises_error(s3_bucket): - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - with pytest.raises(RuntimeError): - restore.S3Storage(conf) - - -@mock.patch("scripts.restore.zipfile.ZipFile") -def test_restore_empty_ws(zipfile): - def create_empty_ws(tempdir): - os.mkdir(tempdir / "gooddata_layouts") - os.mkdir(tempdir / "gooddata_layouts" / "ldm") - os.mkdir(tempdir / "gooddata_layouts" / "analytics_model") - os.mkdir(tempdir / "gooddata_layouts" / "user_data_filters") - os.mkdir(tempdir / "gooddata_layouts" / "filter_views") - os.mkdir(tempdir / "gooddata_layouts" / "automations") - - zipfile.return_value.__enter__.return_value.extractall = create_empty_ws - sdk = mock.Mock() - sdk.catalog_workspace.get_declarative_automations.return_value = [] - api = mock.Mock() - storage = mock.Mock() - ws_paths = {"ws_id": "some/ws/path"} - - worker = restore.RestoreWorker(sdk, api, storage, ws_paths) - worker.incremental_restore() - - sdk.catalog_workspace_content.put_declarative_ldm.assert_called_once_with( - "ws_id", mock.ANY - ) - sdk.catalog_workspace_content.put_declarative_analytics_model.assert_called_once_with( - "ws_id", mock.ANY - ) - - -@mock.patch("scripts.restore.zipfile.ZipFile") -def test_invalid_ws_on_disk_skipped(zipfile): - def create_invalid_ws(tempdir): - # Missing AM directory - os.mkdir(tempdir / "gooddata_layouts") - os.mkdir(tempdir / "gooddata_layouts" / "ldm") - - zipfile.return_value.__enter__.return_value.extractall = create_invalid_ws - - sdk = mock.Mock() - api = mock.Mock() - storage = mock.Mock() - ws_paths = {"ws_id": "some/ws/path"} - - worker = restore.RestoreWorker(sdk, api, storage, ws_paths) - worker.incremental_restore() - - sdk.catalog_workspace_content.put_declarative_ldm.assert_not_called() - sdk.catalog_workspace_content.put_declarative_analytics_model.assert_not_called() - - -# e2e tests - - -def prepare_catalog_mocks(): - ldm = mock.Mock() - ldm.to_dict.return_value = {"ldm": {"foo": "bar"}} - ws_catalog = mock.MagicMock() - return ldm, ws_catalog - - -# No longer need create_backups_in_bucket or mock_boto_session for this specific test -@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") -@mock.patch("scripts.restore.zipfile") -def test_incremental_restore(zipfile_mock, _, mocker): - """ - Tests the RestoreWorker's incremental logic by providing a mock S3Storage object. - """ - # Prepare sdk-related mocks (this is your existing setup) - ldm, ws_catalog = prepare_catalog_mocks() - ws_catalog.load_ldm_from_disk.return_value = ldm - sdk = mock.Mock() - sdk.catalog_workspace.get_declarative_automations.return_value = [] - api = mock.Mock() - sdk.catalog_workspace_content = ws_catalog - - # 1. Create a mock of an S3Storage INSTANCE using mocker - # This mock will behave like an S3Storage object, with the same methods. - mock_storage = mocker.create_autospec(restore.S3Storage, instance=True) - - # 2. Define the behavior of the mock. The worker calls `get_ws_declaration`. - # We can just tell it to do nothing, because the next step in the worker - # (`_extract_zip_archive`) uses `zipfile.ZipFile`, which is already - # mocked by the decorator on this test. - mock_storage.get_ws_declaration.return_value = None - - # 3. Inject the mock dependency into the worker - ws_paths = {"ws_id_1": "ws_id_1", "ws_id_2": "ws_id_2"} - worker = restore.RestoreWorker(sdk, api, mock_storage, ws_paths) - - # 4. Run the code under test - with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid"): - worker.incremental_restore() - - # 5. Assert that the worker interacted with our mock as expected - # This ensures the worker is calling the storage logic correctly. - mock_storage.get_ws_declaration.assert_has_calls( - [ - mock.call("ws_id_1", mock.ANY), - mock.call("ws_id_2", mock.ANY), - ] - ) - - ws_catalog.assert_has_calls( - [ - mock.call.load_ldm_from_disk(mock.ANY), - mock.call.load_analytics_model_from_disk(mock.ANY), - ] - ) - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_1", ldm), - mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), - ] - ) - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_2", ldm), - mock.call.put_declarative_analytics_model("ws_id_2", mock.ANY), - ] - ) - - -@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") -@mock.patch("scripts.restore.zipfile") -def test_incremental_restore_different_ws_source( - _, _load_user_data_filters, create_backups_in_bucket, mock_boto_session -): - # Prepare sdk-related mocks - ldm, ws_catalog = prepare_catalog_mocks() - ws_catalog.load_ldm_from_disk.return_value = ldm - sdk = mock.Mock() - sdk.catalog_workspace_content = ws_catalog - sdk.catalog_workspace.get_declarative_automations.return_value = [] - - api = mock.Mock() - - create_backups_in_bucket(["ws_id_1"]) - - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - storage = restore.S3Storage(conf) - - # 1 -> 1; 2 -> 1 - ws_paths = {"ws_id_1": "ws_id_1", "ws_id_2": "ws_id_1"} - - worker = restore.RestoreWorker(sdk, api, storage, ws_paths) - with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: - worker.incremental_restore() - - ws_catalog.assert_has_calls( - [ - mock.call.load_ldm_from_disk(mock.ANY), - mock.call.load_analytics_model_from_disk(mock.ANY), - ] - ) - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_1", ldm), - mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), - ] - ) - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_2", ldm), - mock.call.put_declarative_analytics_model("ws_id_2", mock.ANY), - ] - ) - - -@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") -@mock.patch("scripts.restore.zipfile") -def test_incremental_restore_one_succeeds_one_fails( - _, _load_user_data_filters, create_backups_in_bucket, mock_boto_session -): - # Prepare sdk-related mocks - ldm, ws_catalog = prepare_catalog_mocks() - # One load succeeds, one fails... - ws_catalog.load_ldm_from_disk.side_effect = [ldm, Exception()] - sdk = mock.Mock() - sdk.catalog_workspace_content = ws_catalog - sdk.catalog_workspace.get_declarative_automations.return_value = [] - - api = mock.Mock() - - create_backups_in_bucket(["ws_id_1", "ws_id_2"]) - - conf = restore.BackupRestoreConfig(TEST_CONF_PATH) - storage = restore.S3Storage(conf) - - ws_paths = {"ws_id_1": "ws_id_1", "ws_id_2": "ws_id_1"} - - worker = restore.RestoreWorker(sdk, api, storage, ws_paths) - with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: - worker.incremental_restore() - - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_1", ldm), - mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), - ] - ) - # Ensure that despite the failure on ws_id_2 restore, we don't put anything - assert_not_called_with(ws_catalog.put_declarative_ldm, "ws_id_2", mock.ANY) - assert_not_called_with( - ws_catalog.put_declarative_analytics_model, "ws_id_2", mock.ANY - ) - - -def test_load_user_data_filters(): - sdk = mock.Mock() - api = mock.Mock() - storage = mock.Mock() - ws_paths = mock.Mock() - - worker = restore.RestoreWorker(sdk, api, storage, ws_paths) - user_data_filters = worker._load_user_data_filters(TEST_UDF_PATH) - user_data_filters_expected = { - "userDataFilters": [ - { - "id": "datafilter2", - "maql": '{label/campaign_channels.category} = "1"', - "title": "Status filter", - "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, - }, - { - "id": "datafilter4", - "maql": '{label/campaign_channels.category} = "1"', - "title": "Status filter", - "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, - }, - ] - } - - # Convert both the expected and actual filter lists to sorted lists of their JSON string representations - sorted_user_data_filters = sorted( - json.dumps(d, sort_keys=True) for d in user_data_filters["userDataFilters"] - ) - sorted_user_data_filters_expected = sorted( - json.dumps(d, sort_keys=True) - for d in user_data_filters_expected["userDataFilters"] - ) - - assert sorted_user_data_filters == sorted_user_data_filters_expected - - -@mock.patch("scripts.restore.create_client") -@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") -@mock.patch("scripts.restore.zipfile") -@mock.patch("scripts.restore.create_parser") -def test_e2e( - create_parser, - zipfile_mock, - _load_user_data_filters, - create_client, - create_backups_in_bucket, - mock_boto_session, -): - conf_path = TEST_CONF_PATH - csv_path = TEST_CSV_PATH - args = argparse.Namespace(conf=conf_path, ws_csv=csv_path, verbose=False) - - # Prepare sdk-related mocks - ldm, ws_catalog = prepare_catalog_mocks() - # On load_ldm_from_disk: Success, Fail, Success - ws_catalog.load_ldm_from_disk.side_effect = [ldm, Exception(), ldm] - sdk = mock.Mock() - sdk.catalog_workspace.get_declarative_automations.return_value = [] - sdk.catalog_workspace_content = ws_catalog - sdk.catalog_workspace.list_workspaces.return_value = [ - MockGdWorkspace(id=f"ws_id_{i}") for i in range(1, 4) - ] - - api = mock.Mock() - - create_client.return_value = sdk, api - - create_backups_in_bucket(["ws_id_1", "ws_id_2"], is_e2e=True) - - # Mock parser and its parse_args to return our args namespace - parser_mock = mock.Mock() - parser_mock.parse_args.return_value = args - create_parser.return_value = parser_mock - - with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: - restore.restore() - - assert_not_called_with( - ws_catalog.put_declarative_ldm, "thiswsdoesnotexist", mock.ANY - ) - assert_not_called_with( - ws_catalog.put_declarative_analytics_model, "thiswsdoesnotexist", mock.ANY - ) - - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_1", ldm), - mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), - ] - ) - - # Ensure that in case of the failure on ws_id_2 restore, we don't PUT anything - assert_not_called_with(ws_catalog.put_declarative_ldm, "ws_id_2", mock.ANY) - assert_not_called_with( - ws_catalog.put_declarative_analytics_model, "ws_id_2", mock.ANY - ) - - ws_catalog.assert_has_calls( - [ - mock.call.put_declarative_ldm("ws_id_3", ldm), - mock.call.put_declarative_analytics_model("ws_id_3", mock.ANY), - ] - ) diff --git a/tests/test_user_group_mgmt.py b/tests/test_user_group_mgmt.py deleted file mode 100644 index 55aebc7..0000000 --- a/tests/test_user_group_mgmt.py +++ /dev/null @@ -1,97 +0,0 @@ -# BSD License -# -# Copyright (c) 2024, GoodData Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import sys - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) -) - -import argparse - -import pytest -from gooddata_pipelines import UserGroupIncrementalLoad - -from scripts import user_group_mgmt -from scripts.user_group_mgmt import read_users_groups_from_csv - -TEST_CSV_PATH = "tests/data/user_group_mgmt/input.csv" - - -def test_conflicting_delimiters_raises_error(monkeypatch): - monkeypatch.setattr("os.path.exists", lambda path: True) - args = argparse.Namespace( - user_group_csv="", delimiter=",", ug_delimiter=",", quotechar='"' - ) - with pytest.raises(RuntimeError): - user_group_mgmt.validate_args(args) - - -@pytest.fixture -def mock_read_csv_file_to_dict(mocker): - """ - Fixture to mock read_csv_file_to_dict in scripts.user_group_mgmt. - """ - - def _mock(return_value): - return mocker.patch( - "scripts.user_group_mgmt.read_csv_file_to_dict", - return_value=return_value, - ) - - return _mock - - -@pytest.mark.parametrize( - "dict_row", - [ - { - "user_group_id": "ug_1", - "user_group_name": "Admins", - "parent_user_groups": "ug_2|ug_3", - "is_active": "True", - }, - { - "user_group_id": "ug_2", - "user_group_name": "Developers", - "parent_user_groups": "", - "is_active": "True", - }, - { - "user_group_id": "ug_3", - "user_group_name": "", - "parent_user_groups": "ug1", - "is_active": "False", - }, - ], -) -def test_from_csv_row_standard(mock_read_csv_file_to_dict, dict_row): - mock_read_csv_file_to_dict([dict_row]) - result = read_users_groups_from_csv( - argparse.Namespace( - user_group_csv="", delimiter=",", ug_delimiter="|", quotechar='"' - ) - ) - expected = [ - UserGroupIncrementalLoad( - user_group_id=dict_row["user_group_id"], - user_group_name=dict_row["user_group_name"] or dict_row["user_group_id"], - parent_user_groups=( - dict_row["parent_user_groups"].split("|") - if dict_row["parent_user_groups"] - else [] - ), - is_active=dict_row["is_active"], - ) - ] - assert result == expected diff --git a/tests/test_user_mgmt.py b/tests/test_user_mgmt.py deleted file mode 100644 index 3637ed7..0000000 --- a/tests/test_user_mgmt.py +++ /dev/null @@ -1,25 +0,0 @@ -# (C) 2025 GoodData Corporation -import os -import sys - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) -) - - -import argparse -from unittest import mock - -import pytest - -from scripts import user_mgmt - - -@mock.patch("os.path.exists") -def test_conflicting_delimiters_raises_error(path_exists): - path_exists.return_value = True - args = argparse.Namespace( - conf="", user_csv="", delimiter=",", ug_delimiter=",", quotechar='"' - ) - with pytest.raises(RuntimeError): - user_mgmt.validate_args(args) diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_utils/test_backup_utils/__init__.py b/tests/test_utils/test_backup_utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_utils/test_backup_utils/test_input_loader.py b/tests/test_utils/test_backup_utils/test_input_loader.py deleted file mode 100644 index bc7465f..0000000 --- a/tests/test_utils/test_backup_utils/test_input_loader.py +++ /dev/null @@ -1,205 +0,0 @@ -# (C) 2025 GoodData Corporation -import os -import sys - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../scripts")) -) - -import tempfile - -import pytest - -from scripts.utils.backup_utils.input_loader import InputLoader -from scripts.utils.gd_api import GDApi -from scripts.utils.models.batch import Size -from scripts.utils.models.workspace_response import ( - Hierarchy, - Links, - Meta, - Page, - Workspace, - WorkspaceResponse, -) - -MOCK_GDP_API = GDApi( - host="https://fake.host/", - api_token="fake_token", -) - - -@pytest.fixture -def input_loader(): - loader = InputLoader(MOCK_GDP_API, page_size=Size(size=2)) - loader.hierarchy_endpoint = "/fake/hierarchy?filter=parent.id=={parent_id}" - loader.all_workspaces_endpoint = "/fake/all" - return loader - - -def test_process_data_extracts_children_and_subparents(): - ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(children_count=2))) - ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(children_count=0))) - ws3 = Workspace(id="ws3", meta=None) - - result = InputLoader.process_data([ws1, ws2, ws3]) - assert result.workspace_ids == ["ws1", "ws2", "ws3"] - assert result.sub_parents == ["ws1"] - - -def test_log_paging_progress_logs_info(mocker): - response = WorkspaceResponse( - data=[], - meta=Meta( - page=Page(size=5, total_elements=25, number=1, total_pages=5), - hierarchy=None, - ), - links=Links(self="self", next="next"), - ) - - mock_logger = mocker.patch("scripts.utils.backup_utils.input_loader.logger.info") - InputLoader.log_paging_progress(response) - mock_logger.assert_called_once - - -def test_log_paging_progress_no_page(mocker): - response = WorkspaceResponse( - data=[], - meta=Meta(page=None, hierarchy=None), - links=Links(self="self", next="next"), - ) - - mock_logger = mocker.patch("scripts.utils.backup_utils.input_loader.logger.info") - InputLoader.log_paging_progress(response) - assert mock_logger.call_count == 0 - - -def test_paginate_calls_fetch_page_and_process_data(input_loader, monkeypatch): - ws1 = Workspace(id="ws1", meta=Meta(hierarchy=Hierarchy(children_count=1))) - ws2 = Workspace(id="ws2", meta=Meta(hierarchy=Hierarchy(children_count=0))) - links1 = Links(self="self", next="next_url") - links2 = Links(self="self", next=None) - resp1 = WorkspaceResponse( - data=[ws1], meta=Meta(hierarchy=None, page=None), links=links1 - ) - resp2 = WorkspaceResponse( - data=[ws2], meta=Meta(hierarchy=None, page=None), links=links2 - ) - - fetch_page_calls = [] - - def fetch_page_side_effect(url): - fetch_page_calls.append(url) - return resp1 if len(fetch_page_calls) == 1 else resp2 - - input_loader.fetch_page = fetch_page_side_effect - - process_data_calls = [] - - def process_data_side_effect(data): - process_data_calls.append(data) - if len(process_data_calls) == 1: - return InputLoader._ProcessDataOutput(["ws1"], ["ws1"]) - else: - return InputLoader._ProcessDataOutput(["ws2"], []) - - monkeypatch.setattr( - InputLoader, "process_data", staticmethod(process_data_side_effect) - ) - monkeypatch.setattr( - InputLoader, "log_paging_progress", staticmethod(lambda resp: None) - ) - - result = input_loader._paginate("first_url") - assert len(result) == 2 - assert result[0].workspace_ids == ["ws1"] - assert result[1].workspace_ids == ["ws2"] - assert len(fetch_page_calls) == 2 - assert len(process_data_calls) == 2 - - -def test_get_hierarchy_recurses(input_loader, monkeypatch): - def fake_paginate(url): - if "p1" in url: - return [InputLoader._ProcessDataOutput(["c1"], ["c1"])] - if "c1" in url: - return [InputLoader._ProcessDataOutput(["c2"], [])] - return [] - - input_loader._paginate = fake_paginate - monkeypatch.setattr( - "scripts.utils.backup_utils.input_loader.logger", - type("Logger", (), {"info": lambda self, msg: None})(), - ) - result = input_loader.get_hierarchy("p1") - assert set(result) == {"c1", "c2"} - - -def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch, caplog): - monkeypatch.setattr( - input_loader, - "_paginate", - lambda _: [], - ) - with caplog.at_level("WARNING"): - input_loader.get_ids_to_backup( - "entire-organization", - "some-csv-file.csv", - ) - assert "No workspaces found in the organization." in caplog.text - - -def test_get_workspaces_to_backup_wrong_input_type(input_loader): - with pytest.raises(RuntimeError, match="Invalid input type provided."): - input_loader.get_ids_to_backup( - "invalid-input-type", - "some-csv-file.csv", - ) - - -def test_read_csv_input_empty_file(input_loader) -> None: - """Test with an empty CSV file.""" - with tempfile.NamedTemporaryFile() as temp_csv: - path_to_csv = temp_csv.name - with pytest.raises(ValueError, match="No content found in the CSV file."): - input_loader.read_csv_input_for_backup(path_to_csv) - - -def test_read_csv_input_only_header(input_loader) -> None: - """Test with a CSV file that contains only the header.""" - with tempfile.NamedTemporaryFile() as temp_csv: - temp_csv.write(b"header1\n") - temp_csv.flush() - temp_csv.seek(0) - path_to_csv = temp_csv.name - with pytest.raises(ValueError, match="No workspaces found in the CSV file."): - input_loader.read_csv_input_for_backup(path_to_csv) - - -def test_read_csv_input_valid(input_loader) -> None: - """Test with a valid CSV file.""" - with tempfile.NamedTemporaryFile(delete=False) as temp_csv: - temp_csv.write(b"header1\n") - temp_csv.write(b"workspace1\n") - temp_csv.write(b"workspace2\n") - temp_csv.flush() - temp_csv.seek(0) - path_to_csv = temp_csv.name - result = input_loader.read_csv_input_for_backup(path_to_csv) - assert result == ["workspace1", "workspace2"] - os.remove(path_to_csv) - - -def test_read_csv_input_too_many_columns(input_loader) -> None: - """Test with a CSV file that contains too many columns.""" - with tempfile.NamedTemporaryFile(delete=False) as temp_csv: - temp_csv.write(b"header1,header2\n") - temp_csv.write(b"workspace1,extra_column\n") - temp_csv.flush() - temp_csv.seek(0) - path_to_csv = temp_csv.name - with pytest.raises( - ValueError, - match="Input file contains more than one column. Please check the input and try again.", - ): - input_loader.read_csv_input_for_backup(path_to_csv) - os.remove(path_to_csv) From b5e1e7871f0067b8caccc54e1d2e996f6583d5d8 Mon Sep 17 00:00:00 2001 From: janmatzek Date: Tue, 14 Oct 2025 09:46:06 +0200 Subject: [PATCH 5/5] docs: fix readme, move examples to docs/examples --- .gitignore | 3 +- README.md | 106 ++++----- docs/BACKUP.md | 130 ----------- docs/CUSTOM_FIELDS.md | 128 ----------- docs/RESTORE.md | 82 ------- .../backup_and_restore/backup_input.csv | 0 .../configuration_local.yaml | 0 .../backup_and_restore/configuration_s3.yaml | 0 .../backup_and_restore/restore_input.csv | 4 + docs/examples/user_mgmt/input.csv | 2 +- .../SETUPAUTHENTICATION.md} | 17 +- docs/reference/COMMON_ARGUMENTS.md | 117 ++++++++++ docs/reference/STORAGE_CONFIG.md | 212 ++++++++++++++++++ docs/tools/BACKUP.md | 136 +++++++++++ docs/tools/CUSTOM_FIELDS.md | 123 ++++++++++ docs/{ => tools}/PERMISSION_MGMT.md | 23 +- docs/tools/RESTORE.md | 93 ++++++++ docs/{ => tools}/USER_DATA_FILTER_MGMT.md | 22 +- docs/{ => tools}/USER_GROUP_MGMT.md | 24 +- docs/{ => tools}/USER_MGMT.md | 30 ++- docs/{ => tools}/WORKSPACE_MGMT.md | 26 ++- 21 files changed, 826 insertions(+), 452 deletions(-) delete mode 100644 docs/BACKUP.md delete mode 100644 docs/CUSTOM_FIELDS.md delete mode 100644 docs/RESTORE.md rename input.csv => docs/examples/backup_and_restore/backup_input.csv (100%) rename configuration_local.yaml => docs/examples/backup_and_restore/configuration_local.yaml (100%) rename configuration_s3.yaml => docs/examples/backup_and_restore/configuration_s3.yaml (100%) create mode 100644 docs/examples/backup_and_restore/restore_input.csv rename docs/{SETUPATUHENTICATION.md => guides/SETUPAUTHENTICATION.md} (96%) create mode 100644 docs/reference/COMMON_ARGUMENTS.md create mode 100644 docs/reference/STORAGE_CONFIG.md create mode 100644 docs/tools/BACKUP.md create mode 100644 docs/tools/CUSTOM_FIELDS.md rename docs/{ => tools}/PERMISSION_MGMT.md (66%) create mode 100644 docs/tools/RESTORE.md rename docs/{ => tools}/USER_DATA_FILTER_MGMT.md (62%) rename docs/{ => tools}/USER_GROUP_MGMT.md (60%) rename docs/{ => tools}/USER_MGMT.md (62%) rename docs/{ => tools}/WORKSPACE_MGMT.md (63%) diff --git a/.gitignore b/.gitignore index b6074e3..77c484e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,5 @@ cover/ **.env # Include sample and test config files -!tests/data/backup/test_local_conf.yaml \ No newline at end of file +!tests/data/** +!docs/** \ No newline at end of file diff --git a/README.md b/README.md index c54b551..4475dc9 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,36 @@ # gooddata-productivity-tools + This repository contains tools that help with GoodData Cloud/CN workspace management, user and user group management, and backup/restore of workspaces. This section of the documentation contains information on how to set up the environment and relevant authentication files. At the end of the Tools section, there is more specific documentation for each tool. The steps mentioned here are shared between them. ## Requirements + Python 3.11 Depending on your environment, the statements can start either as + ```sh -pip +pip pip3 ``` + ```sh python python3 ``` -please use the one that works for you and refers to python 3.10+. + +please use the one that works for you and refers to Python 3.11. The version can be checked by running + ```sh python -V ``` ## Install -In order to install tooling requirements to the target environment, run the following: + +In order to install tooling requirements to the target environment, run the following: ```sh pip install -r requirements.txt @@ -31,75 +38,42 @@ pip install -r requirements.txt ## Authentication -Overall, the scripts developed within the repository follow the credential/authentication provisioning conventions of the GoodData and any used storage provider (e.g. AWS). - -The following section describes what credentials need to be set up, where to find them, and what format they should follow. If you need help with how to edit files in your user home folder (~), you can also refer to [step by step authentication setup guide](docs/SETUPATUHENTICATION.md). - - -### GoodData -When authenticating against GoodData, you can either export the required credentials using environment variables, or provide a GoodData profiles file. - -For example, you can export the environment variables like so: - -```sh -export GDC_AUTH_TOKEN="some_auth_token" -export GDC_HOSTNAME="https://host.name.cloud.gooddata.com/" -``` - -or you can choose to provide a GoodData `profiles.yaml` file of the following format: - -```yaml -default: - host: https://host.name.cloud.gooddata.com/ - token: some_auth_token - -customer: - host: https://customer.hostname.cloud.gooddata.com/ - token: other_auth_token -``` - -By default, a tool attempts to locate a GoodData profile file at `~/.gooddata/profiles.yaml`, but you can also choose to provide a custom path like so: +The scripts follow standard credential/authentication conventions for GoodData and storage providers (e.g., AWS). -```sh -python scripts/restore.py -p path/to/profiles.yaml -``` - -You can define multiple GoodData profiles in a single profiles file. By default, the `default` profile is used, but you can choose different one to use. For example, if you want to tell a tool to use the `customer` profile defined in the example `profiles.yaml` above, you can do so like this: - -```sh -python scripts/restore.py -p path/to/profiles.yaml --profile customer -``` - -In case of providing both ways of authentication to a tool, the environment variables takes precedence and the profiles config is ignored. +### Quick Overview -### AWS +**GoodData Authentication:** -When authenticating against AWS, the [conventions made by the boto3 library](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) are followed. +- Environment variables: `GDC_AUTH_TOKEN` and `GDC_HOSTNAME` +- Profile file: `~/.gooddata/profiles.yaml` (supports multiple profiles) +- Tools use the `default` profile by default +- Environment variables take precedence over profile files -From the tool user perspective that means following the points 3. to 8. from the [Configuring Credentials section](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials). +**AWS Authentication:** -One example of how you can supply AWS credentials for tools, is by defining one or more AWS profiles in `~/.aws/credentials` file. +- Follows [boto3 credential resolution](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) +- Common method: AWS credentials file at `~/.aws/credentials` +- See tool-specific docs for profile selection -``` -[default] -aws_access_key_id = some_key_id -aws_secret_access_key = some_access_key - -[services] -aws_access_key_id = other_key_id -aws_secret_access_key = other_access_key -``` +### Detailed Setup -If you want to specify the specific AWS credentials profile to be used, see the tool-specific documentation. +For step-by-step instructions on creating and configuring authentication files, including file formats and examples, see the [Authentication Setup Guide](docs/guides/SETUPAUTHENTICATION.md). ## Tools -- [Backup workspace](docs/BACKUP.md) -- [Restore workspace](docs/RESTORE.md) -- [Workspace permission management](docs/PERMISSION_MGMT.md) -- [User management](docs/USER_MGMT.md) -- [User group management](docs/USER_GROUP_MGMT.md) +- [Backup workspace](docs/tools/BACKUP.md) +- [Restore workspace](docs/tools/RESTORE.md) +- [Workspace management](docs/tools/WORKSPACE_MGMT.md) +- [Workspace permission management](docs/tools/PERMISSION_MGMT.md) +- [User management](docs/tools/USER_MGMT.md) +- [User group management](docs/tools/USER_GROUP_MGMT.md) +- [User data filter management](docs/tools/USER_DATA_FILTER_MGMT.md) +- [Custom fields management](docs/tools/CUSTOM_FIELDS.md) +### Reference Documentation + +- [Common Arguments Reference](docs/reference/COMMON_ARGUMENTS.md) - Detailed explanations of shared CLI arguments +- [Storage Configuration Reference](docs/reference/STORAGE_CONFIG.md) - Guide to configuring S3 and local storage backends ## Known MacOS issue SSL: CERTIFICATE_VERIFY_FAILED @@ -118,6 +92,7 @@ To mitigate, please install your SSL certificates in HD -> Applications -> Pytho This section is aimed towards developers wanting to adjust / test the code. If you are regular user you can ignore following parts. ### Setup + To set up local development environment do the following: 1. (optional) Set up a local python virtual environment: @@ -133,8 +108,8 @@ To set up local development environment do the following: pip install -r requirements.txt -r requirements-test.txt -r requirements-dev.txt ``` - ### Style checking, linting, and typing + The codebase (both, scripts and tests) is style, lint, and type checked when the CI/CD pipeline runs. Linting and style-checking is done with help of `black` and `ruff`. @@ -147,19 +122,20 @@ To run either of the mentioned tools locally, just call the tool with a target d ``` - For example, in order to check the typing in the scripts, call the following from the repository's root directory: +For example, in order to check the typing in the scripts, call the following from the repository's root directory: ```sh mypy scripts ``` To check that the code styling will pass pre-merge checks when creating a pull request, run: + ```sh tox -e lint ``` - ### Testing + The tooling test suite makes use of some third party tools, such as `pytest`, `tox`, and `moto`. To run the test suite locally, ensure you have test and script requirements installed (see Setup step above) change working directory to repository's root and then call: @@ -168,7 +144,6 @@ To run the test suite locally, ensure you have test and script requirements inst pytest . ``` - ### Tox The pre-merge checks run via GitHub actions use tox to verify the code style and test cases. @@ -182,4 +157,5 @@ tox ``` ## Contributing + If you want to contribute to the project, please read the [contributing guide](CONTRIBUTING.md). diff --git a/docs/BACKUP.md b/docs/BACKUP.md deleted file mode 100644 index 1fffe10..0000000 --- a/docs/BACKUP.md +++ /dev/null @@ -1,130 +0,0 @@ -# GD Export workspace definition - -Tool which exports / creates a backup of one or more workspaces - their logical data model (LDM), analytical model (AM), user data filters (UDF), filter views and automations. Backups are stored either locally or can be uploaded to S3 bucket. - -## Usage -The tool requires the following arguments on input: -- `ws_csv` - a path to a csv file defining target workspace IDs to restore to, and a backup source paths -- `conf` - a path to a configuration file containing information required for accessing the backup source storage -- `--input-type`, `-t` - specification of how the input file from the first argument is handled. This argument is optional. - -### Input type - -The `input-type` argument is optional and has three options: - -`list-of-workspaces` is the default option. The data in the input file is treated as an exhaustive list of workspaces to back up. This is also what will happen if the argument is omitted entirely - -Use `list-of-parents` when the input file contains a list of parent workspaces and you wish to back up the entire hierarchy. For each workspace ID in the input, all of its direct and indirect children are included in the backup, as well as the parent workspaces themselves. - -If the `entire-organization` option is selected, the script will back up all the workspaces within the organization. If this option is selected, you do not need to provide the `ws_csv` argument as it will be ignored. If a `ws_csv` value is provided, the script will log a warning message, but will proceed to back up the organization. - -### Usage examples - -If you want to back up a list of specific workspaces, run: - -```sh -python scripts/backup.py ws_csv conf -``` - -Where ws_csv refers to input csv and conf to configuration file in yaml format. This would be equivalent to running: - -```sh -python scripts/backup.py ws_csv conf -t list-of-workspaces -``` - -For example, if you have csv file named "example_input.csv" in the folder from which you are executing the python command and configuration file named "example_conf.yaml" in subfolder relative to the folder you are executing the script from named "subfolder", the execution could look like this: - -```sh -python scripts/backup.py example_input.csv subfolder/example_conf.yaml -``` - -If you want to back up a specific hierarchy under a parent workspace, prepare the list of parents, store it in a csv file (named for example `parents.csv`) and run: - -```sh -python scripts/backup.py parents.csv conf.yaml -t list-of-parents -``` - -If you want to back up all the workspaces in the organization, run: - -```sh -python scripts/backup.py conf.yaml -t entire-organization -``` - -Note that in this case, you do not need to provide the `ws_csv` argument as no list is required. - -To show the help for using arguments, call: -```sh -python scripts/backup.py -h -``` - -There are two more optional arguments for setting up GoodData profiles. -By default, a tool attempts to locate a GoodData profile file at ~/.gooddata/profiles.yaml, but you can also choose to provide a custom path like so: -- `-p` - path/to/profiles.yaml -- `--profile` - name of GoodData profile to be used - -```sh -python scripts/backup.py input.csv conf.yaml -p path/to/profiles.yaml --profile customer -``` - -## Configuration file (conf) -The configuration files let you define which type of storage the export tool will save the backups to, and any additional storage-specific information that might be required. Currently AWS S3 and Local storage are supported. - -If you run the script with `list-of-parents` or `entire-organization`, the script will fetch the IDs of workspaces to process (either hierarchies under the specified parents or all the workspaces within the organization) in batches. As a default, the batch size is set to `100`, but you can parametrize it by setting the `api_page_size` parameter in your configuration yaml. - -The `batch_size` is an optional parameter which accepts integer value and determines how many workspaces will be processed before saving the backups to the selected storage. As a default, the batch size is set to `100`. If you want to set a different batch size, you can specify so in the configuration yaml. - -The configuration file has the following format: -```yaml -storage_type: some_storage -storage: - arg1: foo - arg2: bar -api_page_size: 1000 -batch_size: 20 -``` - - -### AWS S3 - -You can define the configuration file for S3 storage like so: - -```yaml -storage_type: s3 -storage: - bucket: some_bucket - backup_path: some/path/to/backups/ - profile: services - aws_access_key_id: your-access-key-id - aws_secret_access_key: your-secret-access-key - aws_default_region: us-east-1 -``` -Here, the meaning of different `storage` fields is as follows: -- bucket - S3 storage bucket containing the backups -- backup_path - absolute path within the S3 bucket which leads to the root directory where the backups should be saved -- profile (optional) - AWS profile to be used -- aws_access_key_id (optional) - AWS access key ID to be used -- aws_secret_access_key (optional) - AWS secret access key to be used -- aws_default_region (optional) - AWS region to be used - -### Local Storage - -```yaml -storage_type: local -storage: -``` - -In this case exports are saved to ./local_backups/ folder relative to where the script is executed from. The amount of backups already present in this folder might affect the performace of the script. - -## Input CSV file (ws_csv) -The input CSV file defines the targets and sources for backup restores (imports). - -Following format of the csv is expected: - -| workspace_id | -|--------------| -| ws_id_1 | -| ws_id_2 | -| ws_id_3 | - -Here, each `workspace_id` is the workspace ID of the workspace to perform the export on. -If the defined workspace does not exit in the target organization, this information will be present as ERROR log. If something fails, please read over all ERROR log messages for information where the issue lies. diff --git a/docs/CUSTOM_FIELDS.md b/docs/CUSTOM_FIELDS.md deleted file mode 100644 index 90baf17..0000000 --- a/docs/CUSTOM_FIELDS.md +++ /dev/null @@ -1,128 +0,0 @@ -# Custom Field Management - -The `scripts/custom_fields.py` script will allow you to extend the Logical Data Model (LDM) of a child workspace by adding extra datasets which are not present in the parent workspaces' LDM. - -## Environment setup - -The script relies on `GDC_HOSTNAME` and `GDC_AUTH_TOKEN` environment variables. You can export these by running this in your terminal: - -```shell -export GDC_HOSTNAME=https://your-gooddata-cloud-domain.com -export GDC_AUTH_TOKEN=your-personal-access-token -``` - -## Input files - -The script works with input from two CSV files. These files should contain (a) custom dataset definitions and (b) custom field definitions. - -The custom dataset defines the dataset entity, i.e., the box you would see in the GoodData Cloud UI. The custom fields, on the other hand, define the individual fields in that dataset. You can imagine it as first defining a table and then its columns. - -Multiple datasets and fields can be defined in the files. However, the files need to be consistent with each other - you cannot define fields form datasets that are not defined in the datasets file. - -### Custom dataset definitions - -The first contains the definitions of the datasets you want to create. It should have following structure: - -| workspace_id | dataset_id | dataset_name | dataset_datasource_id | dataset_source_table | dataset_source_sql | parent_dataset_reference | parent_dataset_reference_attribute_id | dataset_reference_source_column | dataset_reference_source_column_data_type | wdf_id | wdf_column_name | -| -------------------- | ----------------- | -------------------- | --------------------- | -------------------- | ------------------ | ------------------------ | ------------------------------------- | ------------------------------- | ----------------------------------------- | ------ | --------------- | -| child_workspace_id_1 | custom_dataset_id | Custom Dataset Title | datasource_id | dataset_source_table | | parent_dataset_id | parent_dataset.reference_field | custom_dataset.reference_field | column data type | wdf_id | wdf_column_name | - -#### Validity constraints - -- The `dataset_source_table` and `dataset_source_sql` are mutually exclusive. Only one of those should be filled in, the other should be null (empty value). In case both values are present, the script will throw an error. - -- `workspace_id` + `dataset_id` combination must be unique across all dataset definitions. - -#### JSON representation - -For readability, here is the data structure in JSON format with comments. However, note that the script will only work with CSV files! - -```json -{ - "workspace_id": "child_workspace_id_1", // child workspace id - "dataset_id": "custom_dataset_id", // custom dataset id - "dataset_name": "Custom Dataset Title", // custom dataset name - "dataset_datasource_id": "datasource_id", // data source id -> in the UI, you see it when you go to "manage files" - "dataset_source_table": "dataset_source_table", // the name of the table in the physical data model - "dataset_source_sql": null, // SQL query defining the dataset - "parent_dataset_reference": "products", // ID of the parent dataset to which the custom one will be connected - "parent_dataset_reference_attribute_id": "products.product_id", // parent dataset column name used fot the "join" - "dataset_reference_source_column": "product_id", // custom dataset column name used for the "join" - "dataset_reference_source_column_data_type": "STRING", // column data type* - "wdf_id": "x__client_id", // workspace data filter id - "wdf_column_name": "client_id" // name of the column used for filtering -} -``` - -\* possible values are listed in `ColumnDataType` enum in [models](../scripts/custom_fields/models/custom_data_object.py) - -### Custom fields definition - -The individual files of the custom dataset are defined thusly: - -| workspace_id | dataset_id | cf_id | cf_name | cf_type | cf_source_column | cf_source_column_data_type | -| -------------------- | ----------------- | --------------- | ----------------- | --------- | -------------------------- | -------------------------- | -| child_workspace_id_1 | custom_dataset_id | custom_field_id | Custom Field Name | attribute | custom_field_source_column | INT | - -#### Validity constraints - -The custom field definitions must comply with these criteria: - -- **attributes** and **facts**: unique `workspace_id` + `cf_id` combinations -- **dates**: unique `dataset_id` and `cf_id` combinations - -#### JSON representation - -Again, here is a JSON definition with comments for readability: - -```json -{ - "workspace_id": "child_workspace_id_1", // child workspace ID - "dataset_id": "custom_dataset_id", // custom dataset ID - "cf_id": "custom_field_id", // custom field ID - "cf_name": "Custom Field Name", // custom field name - "cf_type": "attribute", // GoodData type of the field* - "cf_source_column": "custom_field_source_column", // name of the column in the physical data model - "cf_source_column_data_type": "INT" // data type of the field* -} -``` - -\* Supported values of **_cf_type_** and **_cf_source_column_data_type_** are listed in `CustomFieldType` and `ColumnDataType` enums in [models](../scripts/custom_fields/models/custom_data_object.py) - -## Usage - -Now that your environment and input files are set up, let's have a look at how to run the script 🚀. - -The script takes two positional arguments, which represent the paths to the input files we have discussed above. - -```shell -python scripts/custom_fields.py custom_datasets.csv custom_fields.csv -``` - -There is also an optional flag: `--no-relations-check`. It's meaning is discussed in the next section. - -### Check valid relations - -Regardless of whether the flag is used or not, the script will always start by loading and validating the data from the provided files. The script will then iterate through workspaces. - -#### If unused - -If `--no-relations-check` is not used, the script will: - -1. Store current workspace layout (analytical objects and LDM). -1. Check whether relations of metrics, visualizations and dashboards are valid. A set of current objects with invalid relations is created. -1. Push the updated LDM to GoodData Cloud. -1. Check object relations again. New set of objects with invalid relations is created. -1. The sets are compared. - - If there is more objects with invalid references in the new set, it means the objects were invalidated. Rollback is required. - - If the sets are not equal, rollback might be required - - If there is fewer invalid references or the sets are equal, rollback is not required -1. In case rollback is required, the initally stored workspace layout will be pushed to GoodData Cloud again, reverting changes to the workspace. - -#### If used - -If you decide to use the `--no-relations-check` flag, the script will simply validate the data and push the LDM extension to GoodData Cloud without any additional checks or rollbacks. - -```shell -python scripts/custom_fields.py custom_datasets.csv custom_fields.csv --no-relations-check -``` diff --git a/docs/RESTORE.md b/docs/RESTORE.md deleted file mode 100644 index 60b224f..0000000 --- a/docs/RESTORE.md +++ /dev/null @@ -1,82 +0,0 @@ -# GD Workspace backup restore -Tool which restores one, or more, workspace analytical model (AM), logical data models (LDM) and user data filters (UDF), filter views and automations from source backup archives in an incremental manner. - -The backups contain declarative definitions of AM, LDM and UDFs which are unarchived, loaded into memory and finally put into the target GD workspace. - -The restores are workspace-agnostic, which means that if you need to, you can import a backed-up of one workspace into a different workspace. - -## Usage -The tool requires the following arguments on input: -- `ws_csv` - a path to a csv file defining target workspace IDs to restore to, and a backup source paths -- `conf` - a path to a configuration file containing information required for accessing the backup source storage - -Use the tool like so: - -```sh -python scripts/restore.py ws_csv conf -``` - -Where ws_csv refers to input csv and conf to configuration file in yaml format. - -For example, if you have csv file named "example_input.csv" in the folder from which you are executing the python command and configuration file named "example_conf.yaml" in subfolder relative to the folder you are executing the script from named "subfolder", the execution could look like this: - -```sh -python scripts/restore.py example_input.csv subfolder/example_conf.yaml -``` - - -To show the help for using arguments, call: -```sh -python scripts/restore.py -h -``` - -## Configuration file (conf) -The configuration files lets you define which type of storage the restore tool will source the backups from, and any additional storage-specific information that might be required. Currently only AWS S3 is supported. - -The configuration file has the following format: -```yaml -storage_type: some_storage -storage: - arg1: foo - arg2: bar -``` - -### AWS S3 - -You can define the configuration file for S3 storage like so: - -```yaml -storage_type: s3 -storage: - bucket: some_bucket - backup_path: some/path/to/backups/gd_org_id/ - profile: services - aws_access_key_id: your-access-key-id - aws_secret_access_key: your-secret-access-key - aws_default_region: us-east-1 -``` -Here, the meaning of different `storage` fields is as follows: -- bucket - S3 storage bucket containing the backups -- backup_path - absolute path within the S3 bucket which leads to the root directory of the backups (the input csv file defines sources from here) -- profile (optional) - AWS profile to be used -- aws_access_key_id (optional) - AWS access key ID to be used -- aws_secret_access_key (optional) - AWS secret access key to be used -- aws_default_region (optional) - AWS region to be used - - -## Input CSV file (ws_csv) -The input CSV file defines the the targets and sources for backup restores (imports). - -Following format of the csv is expected: - -| workspace_id | path | -|--------------|------------------| -| ws_id_1 | path/to/backup_1 | -| ws_id_2 | path/to/backup_2 | -| ws_id_3 | path/to/backup_1 | - -Here, each `workspace_id` is the workspace ID of the workspace to perform the restore to. The `path` is the path (e.g. in S3) to a directory which contains the target backup archive (`gooddata_layouts.zip`). - -The `path` is then prefixed with a additional information (e.g. S3 bucket and backup_path to backups root dir). - -If something fails, please read over all ERROR log messages for information where the issue lies. \ No newline at end of file diff --git a/input.csv b/docs/examples/backup_and_restore/backup_input.csv similarity index 100% rename from input.csv rename to docs/examples/backup_and_restore/backup_input.csv diff --git a/configuration_local.yaml b/docs/examples/backup_and_restore/configuration_local.yaml similarity index 100% rename from configuration_local.yaml rename to docs/examples/backup_and_restore/configuration_local.yaml diff --git a/configuration_s3.yaml b/docs/examples/backup_and_restore/configuration_s3.yaml similarity index 100% rename from configuration_s3.yaml rename to docs/examples/backup_and_restore/configuration_s3.yaml diff --git a/docs/examples/backup_and_restore/restore_input.csv b/docs/examples/backup_and_restore/restore_input.csv new file mode 100644 index 0000000..bb31100 --- /dev/null +++ b/docs/examples/backup_and_restore/restore_input.csv @@ -0,0 +1,4 @@ +workspace_id,path +workspaceidtobeexportedone,path/to/backup/1 +workspaceidtobeexportedtwo,path/to/backup/2 +workspaceidtobeexportedthree,path/to/backup/3 diff --git a/docs/examples/user_mgmt/input.csv b/docs/examples/user_mgmt/input.csv index 7c38ea8..4d31f05 100644 --- a/docs/examples/user_mgmt/input.csv +++ b/docs/examples/user_mgmt/input.csv @@ -1,4 +1,4 @@ -user_id, firstname, lastname, email, auth_id, user_groups, is_active +user_id,firstname,lastname,email,auth_id,user_groups,is_active jozef.mrkva,jozef,mrkva,jozef.mrkva@test.com,auth_id_1,,True bartolomej.brokolica,,,,,,False peter.pertzlen,peter,pertzlen,peter.pertzlen@test.com,auth_id_3,ug_1|ug_2,True diff --git a/docs/SETUPATUHENTICATION.md b/docs/guides/SETUPAUTHENTICATION.md similarity index 96% rename from docs/SETUPATUHENTICATION.md rename to docs/guides/SETUPAUTHENTICATION.md index f2a869c..9985f16 100644 --- a/docs/SETUPATUHENTICATION.md +++ b/docs/guides/SETUPAUTHENTICATION.md @@ -6,21 +6,24 @@ This section contains step by step guide how to set up authorization files eithe ### GUI -In Finder, go to your current user home directory by pressing +In Finder, go to your current user home directory by pressing + ```sh CMD+SHIFT+H ``` And subsequently reveal hidden folders by + ```sh CMD+SHIFT+. ``` + Now you can create required folders and files manually. To hide folders afterwards, press the same combination. ### Terminal 1. Open Terminal -2. You should be in your current user home directory. You can check it by executing +2. You should be in your current user home directory. You can check it by executing ```sh pwd @@ -31,7 +34,7 @@ Expected result: /Users/{your_username} If thats not the case, run ```sh -cd ~ +cd ~ ``` Create directories `.aws` and `.gooddata` by executing following statements: @@ -50,6 +53,7 @@ nano .aws/credentials Populate the credentials file with appropriate credentials. Format: + ``` [default] aws_access_key_id = some_access_key_id @@ -59,8 +63,8 @@ aws_secret_access_key = some_access_key aws_access_key_id = other_access_key_id aws_secret_access_key = other_access_key ``` -Save by pressing ctrl+X, Y and Enter. +Save by pressing ctrl+X, Y and Enter. Now create create `profiles.yaml` file within the `.gooddata` folder: @@ -68,7 +72,8 @@ Now create create `profiles.yaml` file within the `.gooddata` folder: nano .gooddata/profiles.yaml ``` -Format: +Format: + ```yaml default: host: https://host.name.cloud.gooddata.com/ @@ -78,8 +83,8 @@ customer: host: https://customer.hostname.cloud.gooddata.com/ token: other_auth_token ``` -Save by pressing ctrl+X, Y and Enter. +Save by pressing ctrl+X, Y and Enter. ## Windows diff --git a/docs/reference/COMMON_ARGUMENTS.md b/docs/reference/COMMON_ARGUMENTS.md new file mode 100644 index 0000000..35ac6df --- /dev/null +++ b/docs/reference/COMMON_ARGUMENTS.md @@ -0,0 +1,117 @@ +# Common Arguments Reference + +This document provides detailed explanations for command-line arguments that are shared across multiple tools in this repository. + +## CSV File Arguments + +### `-d, --delimiter` + +**Type:** String +**Default:** `,` (comma) + +Column delimiter for the CSV files. Use this to define how the CSV is parsed. + +**Example:** + +```sh +python scripts/user_mgmt.py input.csv -d ";" +``` + +This would parse a CSV file that uses semicolons as delimiters instead of commas. + +--- + +### `-i, --inner-delimiter` + +**Type:** String +**Default:** `|` (pipe) +**Available in:** User management, User group management, Workspace management + +Delimiter used to separate multiple values within a single CSV column. This is used for columns that contain lists of items (e.g., multiple user groups, multiple parent groups, or multiple workspace data filter values). + +**Important:** The `--inner-delimiter` must differ from the `--delimiter`. + +**Example:** + +If your CSV contains a column with multiple user groups like `admin|developer|tester`, you would use: + +```sh +python scripts/user_mgmt.py input.csv -i "|" +``` + +--- + +### `-q, --quotechar` + +**Type:** String +**Default:** `"` (double quote) + +Quotation character used to escape special characters (such as the delimiter) within the column field values. This is particularly useful when your data contains the delimiter character itself. + +**Escaping the quotechar:** If you need to include the quotechar itself within a field value, you must embed it in quotechars and then double the quotation character. + +**Example:** + +- Input with escaped quote: `"some""string"` +- Result after parsing: `some"string` + +**Usage:** + +```sh +python scripts/user_mgmt.py input.csv -q "'" +``` + +This would use single quotes as the quotation character instead of double quotes. + +--- + +## GoodData Profile Arguments + +### `-p, --profile-config` + +**Type:** Path +**Default:** `~/.gooddata/profiles.yaml` + +Path to the GoodData profile configuration file. The profile file contains authentication credentials for one or more GoodData instances. + +If not specified, the tool will look for the profiles file in the default location. + +**Example:** + +```sh +python scripts/user_mgmt.py input.csv -p /path/to/custom/profiles.yaml +``` + +For profile file format and detailed authentication setup, see the [Authentication Setup Guide](../guides/SETUPAUTHENTICATION.md). + +--- + +### `--profile` + +**Type:** String +**Default:** `default` + +Name of the GoodData profile to use from the profiles configuration file. This allows you to switch between different GoodData instances or credentials. + +**Example:** + +```sh +python scripts/user_mgmt.py input.csv --profile customer +``` + +This would use the `customer` profile defined in your profiles.yaml file instead of the default profile. + +**Combined Example:** + +```sh +python scripts/user_mgmt.py input.csv -p /path/to/profiles.yaml --profile production +``` + +This command uses a custom profiles file and selects the `production` profile from it. + +--- + +## Notes + +- **Authentication Priority:** If both environment variables (`GDC_AUTH_TOKEN`, `GDC_HOSTNAME`) and profile configuration are provided, the environment variables take precedence. +- **Help Command:** All tools support the `-h` or `--help` flag to display available arguments and their descriptions. diff --git a/docs/reference/STORAGE_CONFIG.md b/docs/reference/STORAGE_CONFIG.md new file mode 100644 index 0000000..804f9e8 --- /dev/null +++ b/docs/reference/STORAGE_CONFIG.md @@ -0,0 +1,212 @@ +# Storage Configuration Reference + +This document provides detailed information about configuring storage backends for backup and restore operations. + +## Overview + +The backup and restore tools support multiple storage backends for saving and retrieving workspace backups. Currently supported storage types are: + +- **AWS S3** - Store backups in Amazon S3 buckets +- **Local** - Store backups in the local filesystem + +The storage configuration is defined in a YAML configuration file that you pass to the backup or restore tool. + +--- + +## Configuration File Format + +The basic structure of a storage configuration file is: + +```yaml +storage_type: +storage: + # Storage-specific configuration options +``` + +Additional optional parameters may be available depending on the tool (e.g., `api_page_size`, `batch_size` for backup operations). + +--- + +## AWS S3 Configuration + +### Basic Configuration + +```yaml +storage_type: s3 +storage: + bucket: your-backup-bucket + backup_path: path/to/backups/ +``` + +### Full Configuration with Optional Parameters + +```yaml +storage_type: s3 +storage: + bucket: your-backup-bucket + backup_path: path/to/backups/ + profile: services + aws_access_key_id: your-access-key-id + aws_secret_access_key: your-secret-access-key + aws_default_region: us-east-1 +``` + +### Field Descriptions + +| Field | Required | Description | +| ----------------------- | -------- | ------------------------------------------------------------------- | +| `bucket` | Yes | S3 bucket name containing the backups | +| `backup_path` | Yes | Absolute path within the S3 bucket to the root directory of backups | +| `profile` | No | AWS profile name to use (from `~/.aws/credentials`) | +| `aws_access_key_id` | No | AWS access key ID for authentication | +| `aws_secret_access_key` | No | AWS secret access key for authentication | +| `aws_default_region` | No | AWS region where the bucket is located | + +### AWS Authentication + +The tools follow the standard [boto3 credential resolution](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) process. You can provide credentials through: + +1. Configuration file (as shown above) +2. Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`) +3. AWS credentials file (`~/.aws/credentials`) +4. IAM roles (when running on AWS infrastructure) + +For detailed AWS authentication setup, see the main [README AWS section](../README.md#aws). + +### Example: Backup to S3 + +**Configuration file (s3_config.yaml):** + +```yaml +storage_type: s3 +storage: + bucket: my-gooddata-backups + backup_path: production/backups/ + profile: production-aws +``` + +**Usage:** + +```sh +python scripts/backup.py workspaces.csv s3_config.yaml +``` + +--- + +## Local Storage Configuration + +### Configuration + +```yaml +storage_type: local +storage: +``` + +### Behavior + +- **Backup operations:** Backups are saved to `./local_backups/` directory relative to where the script is executed +- **Restore operations:** Backups are read from `./local_backups/` directory relative to where the script is executed + +**Note:** For backup operations, the number of existing backups in the `./local_backups/` folder may affect script performance. + +### Example: Backup to Local Storage + +**Configuration file (local_config.yaml):** + +```yaml +storage_type: local +storage: +``` + +**Usage:** + +```sh +python scripts/backup.py workspaces.csv local_config.yaml +``` + +This will create backups in `./local_backups/` in your current working directory. + +--- + +## Backup-Specific Configuration Options + +When using the backup tool, additional configuration options are available: + +### API Page Size + +Controls the batch size when fetching workspace IDs from the GoodData API (used with `list-of-parents` or `entire-organization` input types). + +**Default:** `100` + +```yaml +storage_type: s3 +storage: + bucket: my-bucket + backup_path: backups/ +api_page_size: 200 +``` + +### Batch Size + +Determines how many workspaces are processed before saving the backups to storage. + +**Default:** `100` + +```yaml +storage_type: s3 +storage: + bucket: my-bucket + backup_path: backups/ +batch_size: 50 +``` + +### Complete Example + +```yaml +storage_type: s3 +storage: + bucket: gooddata-prod-backups + backup_path: org_123/backups/ + profile: production + aws_default_region: eu-west-1 +api_page_size: 150 +batch_size: 25 +``` + +--- + +## Configuration File Examples + +Example configuration files can be found in: + +- [examples/backup_and_restore/configuration_s3.yaml](../examples/backup_and_restore/configuration_s3.yaml) +- [examples/backup_and_restore/configuration_local.yaml](../examples/backup_and_restore/configuration_local.yaml) + +--- + +## Troubleshooting + +### S3 Access Issues + +If you encounter permission errors when accessing S3: + +1. Verify your AWS credentials are correctly configured +2. Ensure the IAM user/role has appropriate S3 permissions: + - `s3:GetObject` (for restore) + - `s3:PutObject` (for backup) + - `s3:ListBucket` (for both) +3. Check that the bucket name and region are correct +4. Verify the `backup_path` exists and is accessible + +### Local Storage Issues + +If backups fail with local storage: + +1. Ensure you have write permissions in the current directory +2. Check available disk space +3. Verify the `./local_backups/` directory can be created + +For additional help, check the tool-specific documentation: + +- [Backup Tool Documentation](../tools/BACKUP.md) +- [Restore Tool Documentation](../tools/RESTORE.md) diff --git a/docs/tools/BACKUP.md b/docs/tools/BACKUP.md new file mode 100644 index 0000000..f1c2ee3 --- /dev/null +++ b/docs/tools/BACKUP.md @@ -0,0 +1,136 @@ +# GD Workspace Backup + +Tool which exports / creates a backup of one or more workspaces - their logical data model (LDM), analytical model (AM), user data filters (UDF), filter views and automations. Backups are stored either locally or can be uploaded to an S3 bucket. + +## Usage + +The tool requires the following arguments on input: + +- `ws_csv` - a path to a CSV file defining workspace IDs to back up (required unless using `entire-organization` input type) +- `conf` - a path to a configuration file containing information required for accessing the backup storage + +### Input type + +The `input-type` argument is optional and has three options: + +`list-of-workspaces` is the default option. The data in the input file is treated as an exhaustive list of workspaces to back up. This is also what will happen if the argument is omitted entirely + +Use `list-of-parents` when the input file contains a list of parent workspaces and you wish to back up the entire hierarchy. For each workspace ID in the input, all of its direct and indirect children are included in the backup, as well as the parent workspaces themselves. + +If the `entire-organization` option is selected, the script will back up all the workspaces within the organization. If this option is selected, you do not need to provide the `ws_csv` argument as it will be ignored. If a `ws_csv` value is provided, the script will log a warning message, but will proceed to back up the organization. + +### Usage examples + +If you want to back up a list of specific workspaces, run: + +```sh +python scripts/backup.py ws_csv conf +``` + +Where `ws_csv` refers to the input CSV file and `conf` to the configuration file in YAML format. This would be equivalent to running: + +```sh +python scripts/backup.py ws_csv conf -t list-of-workspaces +``` + +For example, if you have a CSV file named "example_input.csv" in the folder from which you are executing the Python command and a configuration file named "example_conf.yaml" in a subfolder named "subfolder", the execution could look like this: + +```sh +python scripts/backup.py example_input.csv subfolder/example_conf.yaml +``` + +If you want to back up a specific hierarchy under a parent workspace, prepare the list of parents, store it in a CSV file (named for example `parents.csv`) and run: + +```sh +python scripts/backup.py parents.csv conf.yaml -t list-of-parents +``` + +If you want to back up all the workspaces in the organization, run: + +```sh +python scripts/backup.py conf.yaml -t entire-organization +``` + +Note that in this case, you do not need to provide the `ws_csv` argument as no list is required. + +To show the help for using arguments, call: + +```sh +python scripts/backup.py -h +``` + +### Optional arguments + +The following optional arguments are available: + +- `-t, --input-type` - Specification of how the input file is handled. Options: `list-of-workspaces` (default), `list-of-parents`, `entire-organization`. See the Input Type section above for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. + +Example with optional arguments: + +```sh +python scripts/backup.py input.csv conf.yaml -t list-of-parents -p path/to/profiles.yaml --profile customer +``` + +## Configuration file (conf) + +The configuration file defines which type of storage the export tool will save the backups to, and any additional storage-specific information that might be required. Currently AWS S3 and local storage are supported. + +### Backup-Specific Options + +If you run the script with `list-of-parents` or `entire-organization`, the script will fetch the IDs of workspaces to process in batches. You can configure: + +- `api_page_size` - Batch size for fetching workspace IDs from the GoodData API. Default: `100` +- `batch_size` - Number of workspaces to process before saving backups to storage. Default: `100` + +### Configuration Format + +```yaml +storage_type: s3 # or 'local' +storage: + # Storage-specific configuration (see Storage Config documentation) +api_page_size: 1000 # optional +batch_size: 20 # optional +``` + +### Storage Configuration + +For detailed information on configuring AWS S3 or local storage, including all available options and examples, see the [Storage Configuration Reference](../reference/STORAGE_CONFIG.md). + +**Quick examples:** + +**S3 Storage:** + +```yaml +storage_type: s3 +storage: + bucket: my-backup-bucket + backup_path: backups/ +``` + +**Local Storage:** + +```yaml +storage_type: local +storage: +``` + +See [../examples/backup_and_restore](../examples/backup_and_restore/) for complete configuration file examples. + +## Input CSV file (ws_csv) + +The input CSV file defines the targets and sources for backup restores (imports). + +The following CSV format is expected: + +| workspace_id | +| ------------ | +| ws_id_1 | +| ws_id_2 | +| ws_id_3 | + +Here, each `workspace_id` is the workspace ID of the workspace to perform the export on. +If the defined workspace does not exit in the target organization, this information will be present as ERROR log. If something fails, please read over all ERROR log messages for information where the issue lies. + +You can find an example of the input file ([backup_input.csv](../examples/backup_and_restore/backup_input.csv)) in _docs/../examples/backup_and_restore_. diff --git a/docs/tools/CUSTOM_FIELDS.md b/docs/tools/CUSTOM_FIELDS.md new file mode 100644 index 0000000..057d4eb --- /dev/null +++ b/docs/tools/CUSTOM_FIELDS.md @@ -0,0 +1,123 @@ +# Custom Field Management + +The `scripts/custom_fields.py` script will allow you to extend the Logical Data Model (LDM) of a child workspace by adding extra datasets which are not present in the parent workspaces' LDM. + +## Input files + +The script works with input from two CSV files. These files should contain (a) custom dataset definitions and (b) custom field definitions. + +The custom dataset defines the dataset entity, i.e., the box you would see in the GoodData Cloud UI. The custom fields, on the other hand, define the individual fields in that dataset. You can imagine it as first defining a table and then its columns. + +Multiple datasets and fields can be defined in the files. However, the files need to be consistent with each other - you cannot define fields form datasets that are not defined in the datasets file. + +### Custom dataset definitions + +The first contains the definitions of the datasets you want to create. It should have following structure: + +| workspace_id | dataset_id | dataset_name | dataset_datasource_id | dataset_source_table | dataset_source_sql | parent_dataset_reference | parent_dataset_reference_attribute_id | dataset_reference_source_column | dataset_reference_source_column_data_type | wdf_id | wdf_column_name | +| -------------------- | ----------------- | -------------------- | --------------------- | -------------------- | ------------------ | ------------------------ | ------------------------------------- | ------------------------------- | ----------------------------------------- | ------ | --------------- | +| child_workspace_id_1 | custom_dataset_id | Custom Dataset Title | datasource_id | dataset_source_table | | parent_dataset_id | parent_dataset.reference_field | custom_dataset.reference_field | column data type | wdf_id | wdf_column_name | + +#### Validity constraints + +- The `dataset_source_table` and `dataset_source_sql` are mutually exclusive. Only one of those should be filled in, the other should be null (empty value). In case both values are present, the script will throw an error. + +- `workspace_id` + `dataset_id` combination must be unique across all dataset definitions. + +#### Description and Example Values + +Below is a description of each field present in the custom dataset definition CSV. Use the table for guidance on what values to use in your file, and refer to the Example Value column for clarity. Each column must be filled in according to the requirements outlined. + +| Field | Example Value | Description | +| ------------------------------------------- | -------------------------- | --------------------------------------------------------------------------------------------------------------- | +| `workspace_id` | child_workspace_id_1 | Child workspace id | +| `dataset_id` | custom_dataset_id | Custom dataset id | +| `dataset_name` | Custom Dataset Title | Custom dataset name | +| `dataset_datasource_id` | datasource_id | Data source id (can be found in UI under "manage files") | +| `dataset_source_table` | dataset_source_table | Name of the table in the physical data model | +| `dataset_source_sql` | _(leave empty or provide)_ | SQL query defining the dataset (should be empty if above is filled) | +| `parent_dataset_reference` | products | ID of the parent dataset to which this custom dataset will be connected | +| `parent_dataset_reference_attribute_id` | products.product_id | Parent dataset column name used for the "join" | +| `dataset_reference_source_column` | product_id | Custom dataset column name used for the "join" | +| `dataset_reference_source_column_data_type` | STRING | See [ColumnDataType](https://www.gooddata.com/docs/python-sdk/latest/pipelines/ldm_extension/#customfieldtype). | +| `wdf_id` | x\_\_client_id | Workspace data filter id | +| `wdf_column_name` | client_id | Name of the column used for filtering | + +### Custom fields definition + +The individual files of the custom dataset are defined thusly: + +| workspace_id | dataset_id | cf_id | cf_name | cf_type | cf_source_column | cf_source_column_data_type | +| -------------------- | ----------------- | --------------- | ----------------- | --------- | -------------------------- | -------------------------- | +| child_workspace_id_1 | custom_dataset_id | custom_field_id | Custom Field Name | attribute | custom_field_source_column | INT | + +#### Validity constraints + +The custom field definitions must comply with these criteria: + +- **attributes** and **facts**: unique `workspace_id` + `cf_id` combinations +- **dates**: unique `dataset_id` and `cf_id` combinations + +#### Description and Example Values + +Below is a description of each field present in the custom dataset definition CSV. Use the table for guidance on what values to use in your file, and refer to the Example Value column for clarity. Each column must be filled in according to the requirements outlined. + +| Field | Example Value | Description | +| ---------------------------- | -------------------------- | --------------------------------------------------------------------------------------------------------------- | +| `workspace_id` | child_workspace_id_1 | Child workspace ID | +| `dataset_id` | custom_dataset_id | Custom dataset ID | +| `cf_id` | custom_field_id | Custom field ID | +| `cf_name` | Custom Field Name | Custom field name | +| `cf_type` | attribute | See [CustomFieldType](https://www.gooddata.com/docs/python-sdk/latest/pipelines/ldm_extension/#customfieldtype) | +| `cf_source_column` | custom_field_source_column | Name of the column in the physical data model | +| `cf_source_column_data_type` | INT | See [ColumnDataType](https://www.gooddata.com/docs/python-sdk/latest/pipelines/ldm_extension/#customfieldtype) | + +## Usage + +The script requires two positional arguments, which represent the paths to the input files discussed above: + +```shell +python scripts/custom_fields.py custom_datasets.csv custom_fields.csv +``` + +### Optional arguments + +The following optional arguments are available: + +- `-d, --delimiter` - Column delimiter for the CSV files. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-d---delimiter) for details. +- `-q, --quotechar` - Quotation character for escaping special characters. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-q---quotechar) for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. +- `--no-relations-check` - Skip relations check after updating LDM. If used, the script will not verify object relations or perform rollback if issues are found. + +Example with optional arguments: + +```shell +python scripts/custom_fields.py custom_datasets.csv custom_fields.csv -p path/to/profiles.yaml --profile customer --no-relations-check +``` + +### Check valid relations + +Regardless of whether the flag is used or not, the script will always start by loading and validating the data from the provided files. The script will then iterate through workspaces. + +#### If unused + +If `--no-relations-check` is not used, the script will: + +1. Store current workspace layout (analytical objects and LDM). +1. Check whether relations of metrics, visualizations and dashboards are valid. A set of current objects with invalid relations is created. +1. Push the updated LDM to GoodData Cloud. +1. Check object relations again. New set of objects with invalid relations is created. +1. The sets are compared. + - If there is more objects with invalid references in the new set, it means the objects were invalidated. Rollback is required. + - If the sets are not equal, rollback might be required + - If there is fewer invalid references or the sets are equal, rollback is not required +1. In case rollback is required, the initally stored workspace layout will be pushed to GoodData Cloud again, reverting changes to the workspace. + +#### If used + +If you decide to use the `--no-relations-check` flag, the script will simply validate the data and push the LDM extension to GoodData Cloud without any additional checks or rollbacks. + +```shell +python scripts/custom_fields.py custom_datasets.csv custom_fields.csv --no-relations-check +``` diff --git a/docs/PERMISSION_MGMT.md b/docs/tools/PERMISSION_MGMT.md similarity index 66% rename from docs/PERMISSION_MGMT.md rename to docs/tools/PERMISSION_MGMT.md index c74eb51..f5c6a72 100644 --- a/docs/PERMISSION_MGMT.md +++ b/docs/tools/PERMISSION_MGMT.md @@ -8,11 +8,16 @@ Goal of the tool is to help manage state of the user-workspace or userGroup-work The tool requires the following argument on input: -- `perm_csv` - a path to a csv file defining workspace permissions bound to specific ws_id-user or ws_id-userGroup pairs and the permissions isActive state +- `perm_csv` - a path to a CSV file defining workspace permissions bound to specific ws_id-user or ws_id-userGroup pairs and the permissions isActive state -Some other, _optional_, arguments are: +### Optional arguments -- `-d | --delimiter` - column delimiter for the csv files. Use this to define how the csv is parsed. Default value is "`,`" +The following optional arguments are available: + +- `-d, --delimiter` - Column delimiter for the CSV files. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-d---delimiter) for details. +- `-q, --quotechar` - Quotation character for escaping special characters. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-q---quotechar) for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. Use the tool like so: @@ -20,7 +25,7 @@ Use the tool like so: python scripts/permission_mgmt.py perm_csv ``` -Where `perm_csv` refers to input csv. +Where `perm_csv` refers to the input CSV file. If you would like to define custom delimiter, use the tool like so: @@ -28,6 +33,12 @@ If you would like to define custom delimiter, use the tool like so: python scripts/permission_mgmt.py perm_csv -d "," ``` +To use a custom GoodData profile, use: + +```sh +python scripts/permission_mgmt.py perm_csv -p path/to/profiles.yaml --profile customer +``` + To show the help for using arguments, call: ```sh @@ -38,9 +49,9 @@ python scripts/permission_mgmt.py -h The input CSV file defines the workspace permissions which you might want to manage. -[Example input csv.](examples/permission_mgmt/input.csv) +[Example input CSV.](../examples/permission_mgmt/input.csv) -Following format of the csv is expected: +The following CSV format is expected: | user_id | ug_id | ws_id | ws_permissions | is_active | | ------- | ----- | ------- | -------------- | --------- | diff --git a/docs/tools/RESTORE.md b/docs/tools/RESTORE.md new file mode 100644 index 0000000..78c6d3f --- /dev/null +++ b/docs/tools/RESTORE.md @@ -0,0 +1,93 @@ +# GD Workspace Restore + +Tool which restores one or more workspace analytical models (AM), logical data models (LDM), user data filters (UDF), filter views and automations from source backup archives in an incremental manner. + +The backups contain declarative definitions of AM, LDM and UDFs which are unarchived, loaded into memory and finally put into the target GD workspace. + +The restores are workspace-agnostic, which means that if you need to, you can import a backed-up of one workspace into a different workspace. + +## Usage + +The tool requires the following arguments on input: + +- `ws_csv` - a path to a CSV file defining target workspace IDs to restore to, and backup source paths +- `conf` - a path to a configuration file containing information required for accessing the backup source storage + +Use the tool like so: + +```sh +python scripts/restore.py ws_csv conf +``` + +Where `ws_csv` refers to the input CSV file and `conf` to the configuration file in YAML format. + +For example, if you have a CSV file named "example_input.csv" in the folder from which you are executing the Python command and a configuration file named "example_conf.yaml" in a subfolder named "subfolder", the execution could look like this: + +```sh +python scripts/restore.py example_input.csv subfolder/example_conf.yaml +``` + +To show the help for using arguments, call: + +```sh +python scripts/restore.py -h +``` + +### Optional arguments + +The following optional arguments are available: + +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. + +Example with optional arguments: + +```sh +python scripts/restore.py input.csv conf.yaml -p path/to/profiles.yaml --profile customer +``` + +## Configuration file (conf) + +The configuration file defines which type of storage the restore tool will source the backups from, and any additional storage-specific information that might be required. Currently AWS S3 and local storage are supported. + +For detailed information on configuring AWS S3 or local storage, including all available options and examples, see the [Storage Configuration Reference](../reference/STORAGE_CONFIG.md). + +**Quick examples:** + +**S3 Storage:** + +```yaml +storage_type: s3 +storage: + bucket: my-backup-bucket + backup_path: backups/ +``` + +**Local Storage:** + +```yaml +storage_type: local +storage: +``` + +See [../examples/backup_and_restore](../examples/backup_and_restore/) for complete configuration file examples. + +## Input CSV file (ws_csv) + +The input CSV file defines the targets and sources for backup restores (imports). + +The following CSV format is expected: + +| workspace_id | path | +| ------------ | ---------------- | +| ws_id_1 | path/to/backup_1 | +| ws_id_2 | path/to/backup_2 | +| ws_id_3 | path/to/backup_1 | + +Here, each `workspace_id` is the workspace ID of the workspace to perform the restore to. The `path` is the path (e.g. in S3) to a directory which contains the target backup archive (`gooddata_layouts.zip`). + +The `path` is then prefixed with a additional information (e.g. S3 bucket and backup_path to backups root dir). + +You can find an example of the input file ([restore_input.csv](../examples/backup_and_restore/restore_input.csv)) in _docs/../examples/backup_and_restore_. + +If something fails, please read over all ERROR log messages for information where the issue lies. diff --git a/docs/USER_DATA_FILTER_MGMT.md b/docs/tools/USER_DATA_FILTER_MGMT.md similarity index 62% rename from docs/USER_DATA_FILTER_MGMT.md rename to docs/tools/USER_DATA_FILTER_MGMT.md index d0b90a4..c85bf23 100644 --- a/docs/USER_DATA_FILTER_MGMT.md +++ b/docs/tools/USER_DATA_FILTER_MGMT.md @@ -8,16 +8,18 @@ User Data Filters can be created, updated, and deleted based on CSV input. The tool requires the following arguments on input: -- `filepath` - a path to a csv file defining user data filters, their values, and target workspace +- `filepath` - a path to a CSV file defining user data filters, their values, and target workspace - `ldm_column_name` - LDM column name - `maql_column_name` - MAQL column name in the form `{attribute/dataset.field}` -Some other, _optional_, arguments are: +### Optional arguments -- `-d | --delimiter` - column delimiter for the csv files. Use this to define how the csv is parsed. Default value is `,` -- `-q | --quotechar` - quotation character used to escape special characters (such as the delimiter) within the column field value. Default value is `"` If you need to escape the quotechar itself, you have to embed it in quotechars and then double the quotation character (e.g.: `"some""string"` will yield `some"string`). -- `-p | --profile-config` - optional path to GoodData profile config. If no path is provided, the default profiles file is used. -- `--profile` - GoodData profile to use. If no profile is provided, `default` is used. +The following optional arguments are available: + +- `-d, --delimiter` - Column delimiter for the CSV files. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-d---delimiter) for details. +- `-q, --quotechar` - Quotation character for escaping special characters. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-q---quotechar) for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. Use the tool like so: @@ -31,6 +33,12 @@ If you would like to define custom delimiters, use the tool like so: python scripts/user_data_filter_mgmt.py path/to/udfs.csv ldm_column_name maql_column_name -d "," ``` +To use a custom GoodData profile, use: + +```sh +python scripts/user_data_filter_mgmt.py path/to/udfs.csv ldm_column_name maql_column_name -p path/to/profiles.yaml --profile customer +``` + To show the help for using arguments, call: ```sh @@ -41,7 +49,7 @@ python scripts/user_data_filter_mgmt.py -h The input CSV file defines the user data filter values to be managed. All user data filters in all workspaces listed in the input will be overwritten based on the CSV content. -Following format of the csv is expected: +The following CSV format is expected: | workspace_id | udf_id | udf_value | | ------------------------- | --------- | --------- | diff --git a/docs/USER_GROUP_MGMT.md b/docs/tools/USER_GROUP_MGMT.md similarity index 60% rename from docs/USER_GROUP_MGMT.md rename to docs/tools/USER_GROUP_MGMT.md index 6055129..ebc07fc 100644 --- a/docs/USER_GROUP_MGMT.md +++ b/docs/tools/USER_GROUP_MGMT.md @@ -8,11 +8,15 @@ The tool requires the following argument: - `user_group_csv` - a path to a CSV file that defines the user groups, their names, parent user groups, and active status. -Optional arguments include: +### Optional arguments -- `-d | --delimiter` - column delimiter for the CSV files. This defines how the CSV is parsed. The default value is "`,`". -- `-u | --ug_delimiter` - delimiter used to separate different parent user groups within the parent user group column. This must differ from the "delimiter" argument. Default value is "`|`". -- `-q | --quotechar` - quotation character used to escape special characters (such as the delimiter) within the column values. The default value is '`"`'. If you need to escape the quotechar itself, you have to embed it in quotechars and then double the quotation character (e.g.: `"some""string"` will yield `some"string`). +The following optional arguments are available: + +- `-d, --delimiter` - Column delimiter for the CSV files. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-d---delimiter) for details. +- `-i, --inner-delimiter` - Delimiter for parent user groups within a column. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-i---inner-delimiter) for details. +- `-q, --quotechar` - Quotation character for escaping special characters. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-q---quotechar) for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. Use the tool like so: @@ -25,7 +29,13 @@ Where `user_group_csv` refers to the input CSV file. For custom delimiters, use the command: ```sh -python scripts/user_group_mgmt.py user_group_csv -d "," -u "|" +python scripts/user_group_mgmt.py user_group_csv -d "," -i "|" +``` + +To use a custom GoodData profile, use: + +```sh +python scripts/user_group_mgmt.py user_group_csv -p path/to/profiles.yaml --profile customer ``` To display help for using arguments, run: @@ -38,9 +48,9 @@ python scripts/user_group_mgmt.py -h The input CSV file defines the user groups to be managed. User groups not defined in the input file will not be modified. -[Example input CSV.](examples/user_group_mgmt/input.csv) +[Example input CSV.](../examples/user_group_mgmt/input.csv) -Expected CSV format: +The following CSV format is expected: | user_group_id | user_group_name | parent_user_groups | is_active | | ------------- | --------------- | ------------------ | --------- | diff --git a/docs/USER_MGMT.md b/docs/tools/USER_MGMT.md similarity index 62% rename from docs/USER_MGMT.md rename to docs/tools/USER_MGMT.md index 28f1b2e..82c3180 100644 --- a/docs/USER_MGMT.md +++ b/docs/tools/USER_MGMT.md @@ -1,6 +1,6 @@ # GD User Management -Tool which helps manage user entities in an GoodData organization. +Tool which helps manage user entities in a GoodData organization. Users can be created, updated, and deleted. This includes creation of any new userGroups which would be provided in user details. @@ -8,13 +8,17 @@ Users can be created, updated, and deleted. This includes creation of any new us The tool requires the following argument on input: -- `user_csv` - a path to a csv file defining user entities, their relevant attributes, userGroup memberships, and isActive state +- `user_csv` - a path to a CSV file defining user entities, their relevant attributes, userGroup memberships, and isActive state -Some other, _optional_, arguments are: +### Optional arguments -- `-d | --delimiter` - column delimiter for the csv files. Use this to define how the csv is parsed. Default value is "`,`" -- `-u | --ug_delimiter` - userGroups column value delimiter. Use this to separate the different userGroups defined in the userGroup column. Default value is "`|`". Note that `--delimiter` and `--ug_delimiter` have to differ. -- `-q | --quotechar` - quotation character used to escape special characters (such as the delimiter) within the column field value. Default value is '`"`' If you need to escape the quotechar itself, you have to embed it in quotechars and then double the quotation character (e.g.: `"some""string"` will yield `some"string`). +The following optional arguments are available: + +- `-d, --delimiter` - Column delimiter for the CSV files. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-d---delimiter) for details. +- `-i, --inner-delimiter` - UserGroups column value delimiter. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-i---inner-delimiter) for details. +- `-q, --quotechar` - Quotation character for escaping special characters. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-q---quotechar) for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. Use the tool like so: @@ -22,12 +26,18 @@ Use the tool like so: python scripts/user_mgmt.py user_csv ``` -Where `user_csv` refers to input csv. +Where `user_csv` refers to the input CSV file. If you would like to define custom delimiters, use the tool like so: ```sh -python scripts/user_mgmt.py user_csv -d "," -u "|" +python scripts/user_mgmt.py user_csv -d "," -i "|" +``` + +To use a custom GoodData profile, use: + +```sh +python scripts/user_mgmt.py user_csv -p path/to/profiles.yaml --profile customer ``` To show the help for using arguments, call: @@ -40,9 +50,9 @@ python scripts/user_mgmt.py -h The input CSV file defines the user entities which you might want to manage. Note that GD organization users that are not defined in the input will not be modified in any way. -[Example input csv.](examples/user_mgmt/input.csv) +[Example input CSV.](../examples/user_mgmt/input.csv) -Following format of the csv is expected: +The following CSV format is expected: | user_id | firstname | lastname | email | auth_id | user_groups | is_active | | -------------------- | --------- | -------- | ----------------------- | --------- | ----------- | --------- | diff --git a/docs/WORKSPACE_MGMT.md b/docs/tools/WORKSPACE_MGMT.md similarity index 63% rename from docs/WORKSPACE_MGMT.md rename to docs/tools/WORKSPACE_MGMT.md index 77d2a03..e269d07 100644 --- a/docs/WORKSPACE_MGMT.md +++ b/docs/tools/WORKSPACE_MGMT.md @@ -1,6 +1,6 @@ # GD Workspace Management -Tool which helps manage child workspace entities in an GoodData organization. +Tool which helps manage child workspace entities in a GoodData organization. Workspaces can be created, updated, and deleted. This includes applying Workspace Data Filter values, when provided in input. @@ -8,15 +8,17 @@ Workspaces can be created, updated, and deleted. This includes applying Workspac The tool requires the following argument on input: -- `filepath` - a path to a csv file defining workspace entities, their relevant attributes, workspace data filter configuration, and isActive state +- `filepath` - a path to a CSV file defining workspace entities, their relevant attributes, workspace data filter configuration, and isActive state -Some other, _optional_, arguments are: +### Optional arguments -- `-d | --delimiter` - column delimiter for the csv files. Use this to define how the csv is parsed. Default value is `,` -- `-i | --inner-delimiter` - Workspace Data Filter values column delimiter. Use this to separate the different values defined in the `workspace_data_filter_values` column. Default value is `|`. Note that `--delimiter` and `--inner_delimiter` have to differ. -- `-q | --quotechar` - quotation character used to escape special characters (such as the delimiter) within the column field value. Default value is `"` If you need to escape the quotechar itself, you have to embed it in quotechars and then double the quotation character (e.g.: `"some""string"` will yield `some"string`). -- `-p | --profile-config` - optional path to GoodData profile config. If no path is provided, the default profiles file is used. -- `--profile` - GoodData profile to use. If no profile is provided, `default` is used. +The following optional arguments are available: + +- `-d, --delimiter` - Column delimiter for the CSV files. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-d---delimiter) for details. +- `-i, --inner-delimiter` - Workspace Data Filter values column delimiter. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-i---inner-delimiter) for details. +- `-q, --quotechar` - Quotation character for escaping special characters. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-q---quotechar) for details. +- `-p, --profile-config` - Path to GoodData profile configuration file. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#-p---profile-config) for details. +- `--profile` - Name of GoodData profile to use. See [Common Arguments](../reference/COMMON_ARGUMENTS.md#--profile) for details. Use the tool like so: @@ -30,6 +32,12 @@ If you would like to define custom delimiters, use the tool like so: python scripts/workspace_mgmt.py path/to/workspace_definitions.csv -d "," -i "|" ``` +To use a custom GoodData profile, use: + +```sh +python scripts/workspace_mgmt.py path/to/workspace_definitions.csv -p path/to/profiles.yaml --profile customer +``` + To show the help for using arguments, call: ```sh @@ -40,7 +48,7 @@ python scripts/workspace_mgmt.py -h The input CSV file defines the workspace entities which you might want to manage. Note that GD organization workspaces that are not defined in the input will not be modified in any way. -Following format of the csv is expected: +The following CSV format is expected: | parent_id | workspace_id | workspace_name | workspace_data_filter_id | workspace_data_filter_values | is_active | | ------------------- | ---------------------------- | ---------------------------- | ------------------------ | ---------------------------- | --------- |