Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ cover/
**/local_backups/**
**/*.yaml
**/*.csv
**.log
**.env

# Include sample and test config files
!tests/data/backup/test_local_conf.yaml
31 changes: 22 additions & 9 deletions scripts/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import datetime
import json
import logging
import os
import shutil
import tempfile
Expand All @@ -21,7 +22,10 @@
GDApi,
GoodDataRestApiError,
)
from utils.logger import logger # type: ignore[import]
from utils.logger import setup_logging # type: ignore[import]

setup_logging()
logger = logging.getLogger("backup")

TIMESTAMP_SDK_FOLDER = (
str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
Expand Down Expand Up @@ -301,10 +305,7 @@ def get_workspace_export(

user_data_filters = get_user_data_filters(api, ws_id)
if not user_data_filters:
logger.error(
f"Skipping backup of {ws_id} - user data filters returned None."
)
logger.error(f"Check if {ws_id} exists and the API is functional")
logger.error(f"Skipping backup of {ws_id} - check if workspace exists.")
continue

try:
Expand All @@ -320,8 +321,7 @@ def get_workspace_export(

if not exported:
raise RuntimeError(
"None of the workspaces were exported."
"Check source file and their existence."
"None of the workspaces were exported. Check source file and their existence."
)


Expand Down Expand Up @@ -393,11 +393,18 @@ def main(args: argparse.Namespace) -> None:
storage_class: Type[BackupStorage] = get_storage(conf.storage_type)
storage: BackupStorage = storage_class(conf)

# TODO: if storage set to S3, check that valid connection can be established
# currently the script would gather the exports and only then fail to upload them

loader = InputLoader(api, conf.api_page_size)
workspaces_to_export: list[str] = loader.get_ids_to_backup(
args.input_type, args.ws_csv
)

if not workspaces_to_export:
logger.error("No workspaces to export. Check the input file or the input type.")
return

with tempfile.TemporaryDirectory() as tmpdir:
get_workspace_export(sdk, api, tmpdir, org_id, workspaces_to_export)

Expand All @@ -409,5 +416,11 @@ def main(args: argparse.Namespace) -> None:
if __name__ == "__main__":
parser: argparse.ArgumentParser = create_parser()
args: argparse.Namespace = parser.parse_args()
validate_args(args)
main(args)

try:
validate_args(args)
main(args)

logger.info("Backup completed.")
except Exception as e:
logger.error(f"Backup failed: {e}")
19 changes: 11 additions & 8 deletions scripts/utils/backup_utils/input_loader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# (C) 2025 GoodData Corporation
import csv
import logging
from dataclasses import dataclass
from typing import Iterator

Expand All @@ -9,12 +10,13 @@
GoodDataRestApiError,
MaybeResponse,
)
from utils.logger import logger # type: ignore[import]
from utils.models.workspace_response import ( # type: ignore[import]
Workspace,
WorkspaceResponse,
)

logger = logging.getLogger(__name__)


class InputLoader:
"""Class to handle loading and parsing the input data."""
Expand Down Expand Up @@ -145,6 +147,11 @@ def get_hierarchy(self, parent_id: str) -> list[str]:
for subparent in sub_parents:
all_children += self.get_hierarchy(subparent)

if not all_children:
logger.warning(
f"No child workspaces found for parent workspace ID: {parent_id}"
)

return all_children

def get_all_workspaces(self) -> list[str]:
Expand All @@ -163,6 +170,9 @@ def get_all_workspaces(self) -> list[str]:
for result in results:
all_workspaces.extend(result.workspace_ids)

if not all_workspaces:
logger.warning("No workspaces found in the organization.")

return all_workspaces

def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]:
Expand All @@ -178,18 +188,11 @@ def get_ids_to_backup(self, input_type: str, path_to_csv: str) -> list[str]:
for parent in list_of_parents:
list_of_children.extend(self.get_hierarchy(parent))

if not list_of_children:
raise RuntimeError(
"No child workspaces found for the provided list of parents."
)

# Include the parent workspaces in the backup
return list_of_parents + list_of_children

if input_type == "entire-organization":
list_of_workspaces = self.get_all_workspaces()
if not list_of_workspaces:
raise RuntimeError("No workspaces found in the organization.")
return list_of_workspaces

raise RuntimeError("Invalid input type provided.")
7 changes: 6 additions & 1 deletion scripts/utils/gd_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# (C) 2025 GoodData Corporation

import json
import logging
from typing import Any, TypeAlias

import requests
from utils.logger import logger # type: ignore[import]

logger = logging.getLogger(__name__)

API_VERSION = "v1"
BEARER_TKN_PREFIX = "Bearer"
Expand Down Expand Up @@ -75,6 +77,9 @@ def _resolve_return_code(
response, ok_code: int, url, method, not_found_code: int | None = None
) -> MaybeResponse:
"""Resolves the return code of the response."""
# TODO: this can be simplified, it would be more transparent to evaluate the
# requests.Response.status_code directly in each particular use case rather than
# checking if a "MaybeResponse" type is None or not.
if response.status_code == ok_code:
logger.debug(f"{method} to {url} succeeded")
return response
Expand Down
60 changes: 47 additions & 13 deletions scripts/utils/logger.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,56 @@
import logging
import os
import sys
from datetime import datetime

BASE_FORMAT = "%(asctime)s %(script)s [%(levelname)s] %(message)s"
FORMATS = {
logging.WARNING: f"\033[33m{BASE_FORMAT}\033[00m",
logging.ERROR: f"\033[31m{BASE_FORMAT}\033[00m",
}

class LevelFormatter(logging.Formatter):
BASE_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
FORMATS = {
logging.WARNING: "\033[33m%(asctime)s [%(levelname)s] %(message)s\033[00m",
logging.ERROR: "\033[31m%(asctime)s [%(levelname)s] %(message)s\033[00m",
}

class LevelFormatter(logging.Formatter):
def format(self, record):
fmt = self.FORMATS.get(record.levelno, self.BASE_FORMAT)
fmt = FORMATS.get(record.levelno, BASE_FORMAT)
formatter = logging.Formatter(fmt)
return formatter.format(record)


logger = logging.getLogger(__name__)
logging.getLogger(__name__).setLevel(logging.INFO)
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setFormatter(LevelFormatter())
logger.addHandler(ch)
class LogHandler(logging.Handler):
def __init__(self, script_name: str) -> None:
super().__init__()
self.script_name: str = os.path.splitext(os.path.basename(script_name))[0]
self.file_handler: logging.FileHandler | None = None

self.stream_handler: logging.StreamHandler = logging.StreamHandler()
self.stream_handler.setFormatter(LevelFormatter())

def emit(self, record: logging.LogRecord) -> None:
# Top level script name
record.script = self.script_name
self.stream_handler.emit(record)

# Save Warnings and Errors to a file
if record.levelno >= logging.WARNING:
if self.file_handler is None:
date_str = datetime.now().strftime("%Y-%m-%d")
log_filename = f"{self.script_name}_{date_str}.log"
self.file_handler = logging.FileHandler(log_filename, encoding="utf-8")
self.file_handler.setFormatter(logging.Formatter(BASE_FORMAT))
self.file_handler.emit(record)


def get_top_level_script() -> str:
"""Returns the name of the top-level script."""
if hasattr(sys, "argv") and sys.argv and sys.argv[0]:
return sys.argv[0]
return "__main__"


def setup_logging() -> None:
"""Sets up logging configuration for the root logger."""
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
root_logger.handlers.clear()
root_logger.addHandler(LogHandler(get_top_level_script()))
15 changes: 7 additions & 8 deletions tests/test_utils/test_backup_utils/test_input_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
api_token="fake_token",
)

# MOCK_INPUT_LOADER = backup.InputLoader(MOCK_GD_API, 100)


@pytest.fixture
def input_loader():
Expand Down Expand Up @@ -57,7 +55,7 @@ def test_log_paging_progress_logs_info(mocker):
links=Links(self="self", next="next"),
)

mock_logger = mocker.patch("scripts.utils.logger.logger.info")
mock_logger = mocker.patch("scripts.utils.backup_utils.input_loader.logger.info")
InputLoader.log_paging_progress(response)
mock_logger.assert_called_once

Expand All @@ -69,7 +67,7 @@ def test_log_paging_progress_no_page(mocker):
links=Links(self="self", next="next"),
)

mock_logger = mocker.patch("scripts.utils.logger.logger.info")
mock_logger = mocker.patch("scripts.utils.backup_utils.input_loader.logger.info")
InputLoader.log_paging_progress(response)
assert mock_logger.call_count == 0

Expand Down Expand Up @@ -135,17 +133,18 @@ def fake_paginate(url):
assert set(result) == {"c1", "c2"}


def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch):
def test_get_workspaces_to_backup_empty_org(input_loader, monkeypatch, caplog):
monkeypatch.setattr(
input_loader,
"get_all_workspaces",
lambda: [],
"_paginate",
lambda _: [],
)
with pytest.raises(RuntimeError, match="No workspaces found in the organization."):
with caplog.at_level("WARNING"):
input_loader.get_ids_to_backup(
"entire-organization",
"some-csv-file.csv",
)
assert "No workspaces found in the organization." in caplog.text


def test_get_workspaces_to_backup_wrong_input_type(input_loader):
Expand Down