diff --git a/.gitignore b/.gitignore index 3ac522c..7857752 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,8 @@ cover/ **/local_backups/** **/*.yaml **/*.csv +**.log +**.env # Include sample and test config files !tests/data/backup/test_local_conf.yaml \ No newline at end of file diff --git a/scripts/backup.py b/scripts/backup.py index 10d869e..06b8ea5 100644 --- a/scripts/backup.py +++ b/scripts/backup.py @@ -3,6 +3,7 @@ import argparse import datetime import json +import logging import os import shutil import tempfile @@ -21,7 +22,10 @@ GDApi, GoodDataRestApiError, ) -from utils.logger import logger # type: ignore[import] +from utils.logger import setup_logging # type: ignore[import] + +setup_logging() +logger = logging.getLogger("backup") TIMESTAMP_SDK_FOLDER = ( str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) @@ -301,10 +305,7 @@ def get_workspace_export( user_data_filters = get_user_data_filters(api, ws_id) if not user_data_filters: - logger.error( - f"Skipping backup of {ws_id} - user data filters returned None." - ) - logger.error(f"Check if {ws_id} exists and the API is functional") + logger.error(f"Skipping backup of {ws_id} - check if workspace exists.") continue try: @@ -320,8 +321,7 @@ def get_workspace_export( if not exported: raise RuntimeError( - "None of the workspaces were exported." - "Check source file and their existence." + "None of the workspaces were exported. Check source file and their existence." ) @@ -393,11 +393,18 @@ def main(args: argparse.Namespace) -> None: storage_class: Type[BackupStorage] = get_storage(conf.storage_type) storage: BackupStorage = storage_class(conf) + # TODO: if storage set to S3, check that valid connection can be established + # currently the script would gather the exports and only then fail to upload them + loader = InputLoader(api, conf.api_page_size) workspaces_to_export: list[str] = loader.get_ids_to_backup( args.input_type, args.ws_csv ) + if not workspaces_to_export: + logger.error("No workspaces to export. Check the input file or the input type.") + return + with tempfile.TemporaryDirectory() as tmpdir: get_workspace_export(sdk, api, tmpdir, org_id, workspaces_to_export) @@ -409,5 +416,11 @@ def main(args: argparse.Namespace) -> None: if __name__ == "__main__": parser: argparse.ArgumentParser = create_parser() args: argparse.Namespace = parser.parse_args() - validate_args(args) - main(args) + + try: + validate_args(args) + main(args) + + logger.info("Backup completed.") + except Exception as e: + logger.error(f"Backup failed: {e}") diff --git a/scripts/utils/backup_utils/input_loader.py b/scripts/utils/backup_utils/input_loader.py index 3f0df38..393fc5b 100644 --- a/scripts/utils/backup_utils/input_loader.py +++ b/scripts/utils/backup_utils/input_loader.py @@ -1,5 +1,6 @@ # (C) 2025 GoodData Corporation import csv +import logging from dataclasses import dataclass from typing import Iterator @@ -9,12 +10,13 @@ GoodDataRestApiError, MaybeResponse, ) -from utils.logger import logger # type: ignore[import] from utils.models.workspace_response import ( # type: ignore[import] Workspace, WorkspaceResponse, ) +logger = logging.getLogger(__name__) + class InputLoader: """Class to handle loading and parsing the input data.""" @@ -145,6 +147,11 @@ def get_hierarchy(self, parent_id: str) -> list[str]: for subparent in sub_parents: all_children += self.get_hierarchy(subparent) + if not all_children: + logger.warning( + f"No child workspaces found for parent workspace ID: {parent_id}" + ) + return all_children def get_all_workspaces(self) -> list[str]: @@ -163,6 +170,9 @@ def get_all_workspaces(self) -> list[str]: for result in results: all_workspaces.extend(result.workspace_ids) + if not all_workspaces: + logger.warning("No workspaces found in the organization.") + return all_workspaces def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]: @@ -178,18 +188,11 @@ def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]: for parent in list_of_parents: list_of_children.extend(self.get_hierarchy(parent)) - if not list_of_children: - raise RuntimeError( - "No child workspaces found for the provided list of parents." - ) - # Include the parent workspaces in the backup return list_of_parents + list_of_children if input_type == "entire-organization": list_of_workspaces = self.get_all_workspaces() - if not list_of_workspaces: - raise RuntimeError("No workspaces found in the organization.") return list_of_workspaces raise RuntimeError("Invalid input type provided.") diff --git a/scripts/utils/gd_api.py b/scripts/utils/gd_api.py index 56a5450..7be9898 100644 --- a/scripts/utils/gd_api.py +++ b/scripts/utils/gd_api.py @@ -1,10 +1,12 @@ # (C) 2025 GoodData Corporation import json +import logging from typing import Any, TypeAlias import requests -from utils.logger import logger # type: ignore[import] + +logger = logging.getLogger(__name__) API_VERSION = "v1" BEARER_TKN_PREFIX = "Bearer" @@ -75,6 +77,9 @@ def _resolve_return_code( response, ok_code: int, url, method, not_found_code: int | None = None ) -> MaybeResponse: """Resolves the return code of the response.""" + # TODO: this can be simplified, it would be more transparent to evaluate the + # requests.Response.status_code directly in each particular use case rather than + # checking if a "MaybeResponse" type is None or not. if response.status_code == ok_code: logger.debug(f"{method} to {url} succeeded") return response diff --git a/scripts/utils/logger.py b/scripts/utils/logger.py index 576fa3c..fe62e12 100644 --- a/scripts/utils/logger.py +++ b/scripts/utils/logger.py @@ -1,22 +1,56 @@ import logging +import os +import sys +from datetime import datetime +BASE_FORMAT = "%(asctime)s %(script)s [%(levelname)s] %(message)s" +FORMATS = { + logging.WARNING: f"\033[33m{BASE_FORMAT}\033[00m", + logging.ERROR: f"\033[31m{BASE_FORMAT}\033[00m", +} -class LevelFormatter(logging.Formatter): - BASE_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" - FORMATS = { - logging.WARNING: "\033[33m%(asctime)s [%(levelname)s] %(message)s\033[00m", - logging.ERROR: "\033[31m%(asctime)s [%(levelname)s] %(message)s\033[00m", - } +class LevelFormatter(logging.Formatter): def format(self, record): - fmt = self.FORMATS.get(record.levelno, self.BASE_FORMAT) + fmt = FORMATS.get(record.levelno, BASE_FORMAT) formatter = logging.Formatter(fmt) return formatter.format(record) -logger = logging.getLogger(__name__) -logging.getLogger(__name__).setLevel(logging.INFO) -logger.setLevel(logging.INFO) -ch = logging.StreamHandler() -ch.setFormatter(LevelFormatter()) -logger.addHandler(ch) +class LogHandler(logging.Handler): + def __init__(self, script_name: str) -> None: + super().__init__() + self.script_name: str = os.path.splitext(os.path.basename(script_name))[0] + self.file_handler: logging.FileHandler | None = None + + self.stream_handler: logging.StreamHandler = logging.StreamHandler() + self.stream_handler.setFormatter(LevelFormatter()) + + def emit(self, record: logging.LogRecord) -> None: + # Top level script name + record.script = self.script_name + self.stream_handler.emit(record) + + # Save Warnings and Errors to a file + if record.levelno >= logging.WARNING: + if self.file_handler is None: + date_str = datetime.now().strftime("%Y-%m-%d") + log_filename = f"{self.script_name}_{date_str}.log" + self.file_handler = logging.FileHandler(log_filename, encoding="utf-8") + self.file_handler.setFormatter(logging.Formatter(BASE_FORMAT)) + self.file_handler.emit(record) + + +def get_top_level_script() -> str: + """Returns the name of the top-level script.""" + if hasattr(sys, "argv") and sys.argv and sys.argv[0]: + return sys.argv[0] + return "__main__" + + +def setup_logging() -> None: + """Sets up logging configuration for the root logger.""" + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + root_logger.handlers.clear() + root_logger.addHandler(LogHandler(get_top_level_script())) diff --git a/tests/test_utils/test_backup_utils/test_input_loader.py b/tests/test_utils/test_backup_utils/test_input_loader.py index a520e88..163f4dd 100644 --- a/tests/test_utils/test_backup_utils/test_input_loader.py +++ b/tests/test_utils/test_backup_utils/test_input_loader.py @@ -26,8 +26,6 @@ api_token="fake_token", ) -# MOCK_INPUT_LOADER = backup.InputLoader(MOCK_GD_API, 100) - @pytest.fixture def input_loader(): @@ -57,7 +55,7 @@ def test_log_paging_progress_logs_info(mocker): links=Links(self="self", next="next"), ) - mock_logger = mocker.patch("scripts.utils.logger.logger.info") + mock_logger = mocker.patch("scripts.utils.backup_utils.input_loader.logger.info") InputLoader.log_paging_progress(response) mock_logger.assert_called_once @@ -69,7 +67,7 @@ def test_log_paging_progress_no_page(mocker): links=Links(self="self", next="next"), ) - mock_logger = mocker.patch("scripts.utils.logger.logger.info") + mock_logger = mocker.patch("scripts.utils.backup_utils.input_loader.logger.info") InputLoader.log_paging_progress(response) assert mock_logger.call_count == 0 @@ -135,17 +133,18 @@ def fake_paginate(url): assert set(result) == {"c1", "c2"} -def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch): +def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch, caplog): monkeypatch.setattr( input_loader, - "get_all_workspaces", - lambda: [], + "_paginate", + lambda _: [], ) - with pytest.raises(RuntimeError, match="No workspaces found in the organization."): + with caplog.at_level("WARNING"): input_loader.get_ids_to_backup( "entire-organization", "some-csv-file.csv", ) + assert "No workspaces found in the organization." in caplog.text def test_get_workspaces_to_backup_wrong_input_type(input_loader):