diff --git a/requirements.txt b/requirements.txt index 9cc3c26..dd192b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,9 @@ -boto3==1.38.45 -gooddata_sdk==1.39.0 -requests==2.32.0 -pydantic==2.11.3 \ No newline at end of file +# GoodData Python SDK packages +gooddata_sdk>=1.51 +gooddata-pipelines>=1.51 + +# Other dependencies +# TODO: remove after full transition to GoodData SDK packages +requests +boto3 +pydantic \ No newline at end of file diff --git a/scripts/user_mgmt.py b/scripts/user_mgmt.py index cc2cf34..d53567b 100644 --- a/scripts/user_mgmt.py +++ b/scripts/user_mgmt.py @@ -4,26 +4,17 @@ import logging import os import re -import sys -from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Optional -import gooddata_sdk as gd_sdk -from gooddata_api_client.exceptions import NotFoundException - -UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" - -PROFILES_FILE = "profiles.yaml" -PROFILES_DIRECTORY = ".gooddata" -PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE -LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" +from gooddata_pipelines import UserIncrementalLoad, UserProvisioner +from gooddata_sdk.utils import PROFILES_FILE_PATH +from utils.logger import setup_logging # type: ignore[import] +from utils.utils import create_provisioner # type: ignore[import] +setup_logging() logger = logging.getLogger(__name__) -handler = logging.StreamHandler(sys.stdout) -handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) -logger.addHandler(handler) -logger.setLevel(logging.INFO) + +UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" def create_parser() -> argparse.ArgumentParser: @@ -79,209 +70,41 @@ def create_parser() -> argparse.ArgumentParser: return parser -class GoodDataRestApiError(Exception): - """Wrapper for errors occurring from interaction with GD REST API.""" - - -def optional(string: str) -> Optional[str]: - """ - Ensures conversion of empty string to None. - - CSV reader parses empty fields as empty strings. - - Returns string or None. - """ - return string if string else None - - -@dataclass -class GDUserTarget: - user_id: str - firstname: Optional[str] - lastname: Optional[str] - email: Optional[str] - auth_id: Optional[str] - user_groups: list[str] - is_active: bool = field(compare=False) - - @classmethod - def from_csv_row( - cls, row: list[Any], user_group_delim: str = "," - ) -> "GDUserTarget": - """Creates GDUserTarget from csv row.""" - user_id, firstname, lastname, email, auth_id, user_groups, is_active = row - user_groups_list = user_groups.split(user_group_delim) if user_groups else [] - return GDUserTarget( - user_id=user_id, - firstname=optional(firstname), - lastname=optional(lastname), - email=optional(email), - auth_id=optional(auth_id), - user_groups=user_groups_list, - is_active=str(is_active).lower() == "true", - ) +def read_users_from_csv( + path_to_csv: str, row_delimiter: str, quotechar: str, user_group_delimiter: str +) -> list[UserIncrementalLoad]: + """Reads users from csv file.""" - @classmethod - def from_sdk_obj(cls, obj: gd_sdk.CatalogUser) -> "GDUserTarget": - """Creates GDUserTarget from CatalogUser SDK object.""" - return GDUserTarget( - user_id=obj.id, - firstname=obj.attributes.firstname if obj.attributes else None, - lastname=obj.attributes.lastname if obj.attributes else None, - email=obj.attributes.email if obj.attributes else None, - auth_id=obj.attributes.authentication_id if obj.attributes else None, - user_groups=[ug.id for ug in obj.user_groups], - is_active=True, - ) + users: list[UserIncrementalLoad] = [] - def to_sdk_obj(self) -> gd_sdk.CatalogUser: - """Converts GDUserTarget to CatalogUser SDK object.""" - return gd_sdk.CatalogUser.init( - user_id=self.user_id, - firstname=self.firstname, - lastname=self.lastname, - email=self.email, - authentication_id=self.auth_id, - user_group_ids=self.user_groups, + with open(path_to_csv, "r") as f: + reader = csv.DictReader( + f, delimiter=row_delimiter, quotechar=quotechar, skipinitialspace=True ) - - -class UserManager: - def __init__(self, sdk: gd_sdk.GoodDataSdk): - self._sdk = sdk - - def _try_get_user(self, user: GDUserTarget) -> Optional[GDUserTarget]: - try: - user_sdk_obj = self._sdk.catalog_user.get_user(user.user_id) - return GDUserTarget.from_sdk_obj(user_sdk_obj) - except NotFoundException: - return None - - def _get_or_create_user_groups(self, groups: list[str]): - """Ensures that all user groups exist in the project.""" - # TODO - Can be optimized - preloading all user groups and checking on the go - for group in groups: + for row in reader: try: - self._sdk.catalog_user.get_user_group(group) - except NotFoundException: - logger.info(f'UserGroup "{group}" doesn\'t exist - creating...') - self._sdk.catalog_user.create_or_update_user_group( - gd_sdk.CatalogUserGroup.init( - user_group_id=group, user_group_name=group - ) + user_id = row["user_id"] + firstname = row["firstname"] + lastname = row["lastname"] + email = row["email"] + auth_id = row["auth_id"] + user_groups = row["user_groups"].split(user_group_delimiter) + is_active = row["is_active"] == "True" + + user = UserIncrementalLoad( + user_id=user_id, + firstname=firstname, + lastname=lastname, + email=email, + auth_id=auth_id, + user_groups=user_groups, + is_active=is_active, ) - def _create_or_update_user(self, user: GDUserTarget): - """Creates or updates user in the project.""" - upstream_user = self._try_get_user(user) - if user == upstream_user: - logger.info(f'No action for user "{user.user_id}"') - return - if not upstream_user: - logger.info(f'Creating user "{user.user_id}"...') - else: - logger.info(f'Updating user "{user.user_id}"...') - - self._get_or_create_user_groups(user.user_groups) - self._sdk.catalog_user.create_or_update_user(user.to_sdk_obj()) - - def _delete_user(self, user: GDUserTarget): - """Deletes user from the project.""" - try: - self._sdk.catalog_user.get_user(user.user_id) - except NotFoundException: - logger.info(f'No action for user "{user.user_id}"') - return - logger.info(f'Deleting user "{user.user_id}"') - self._sdk.catalog_user.delete_user(user.user_id) - - def manage_user(self, user: GDUserTarget): - """Manages user based on the provided GDUserTarget.""" - if user.is_active: - self._create_or_update_user(user) - else: - self._delete_user(user) - - def manage_users(self, users: list[GDUserTarget]): - """Manages multiple users based on the provided GDUserTargets.""" - logger.info(f"Starting user management run of {len(users)} users...") - for user in users: - try: - self.manage_user(user) - except GoodDataRestApiError as e: - logger.error(f"API request for user failed: {e}") - except Exception as e: - logger.error(f"Something went wrong for {user.user_id}. Error: {e}") - logger.info("User management run finished.") - - -# TODO - simplify after complete switch to SDK -def create_clients(args: argparse.Namespace) -> gd_sdk.GoodDataSdk: - """Creates GoodData SDK client.""" - gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") - gdc_hostname = os.environ.get("GDC_HOSTNAME") - - if gdc_hostname and gdc_auth_token: - logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") - sdk = gd_sdk.GoodDataSdk.create(gdc_hostname, gdc_auth_token) - return sdk - - profile_config, profile = args.profile_config, args.profile - if os.path.exists(profile_config): - logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") - sdk = gd_sdk.GoodDataSdk.create_from_profile(profile, profile_config) - return sdk - - raise RuntimeError( - "No GoodData credentials provided. Please export required ENVVARS " - "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." - ) - - -def csv_row_is_valid(row: list[Any]) -> bool: - """Validates csv row.""" - try: - user_id, firstname, lastname, email, auth_id, user_groups, is_active = row - except Exception as e: - logger.error( - "Unable to parse csv row. " - "Most probably an incorrect amount of values was defined. " - f'Skipping following row: "{row}". Error: "{e}".' - ) - return False - - if not user_id: - logger.error( - f'user_id field seems to be empty. Skipping following row: "{row}".' - ) - return False - - if not is_active: - logger.error( - f'is_active field seems to be empty. Skipping following row: "{row}".' - ) - return False - - return True - - -def read_users_from_csv(args: argparse.Namespace) -> list[GDUserTarget]: - """Reads users from csv file.""" - # TODO - handling of csv files with and without headers - users: list[GDUserTarget] = [] - with open(args.user_csv, "r") as f: - reader = csv.reader( - f, delimiter=args.delimiter, quotechar=args.quotechar, skipinitialspace=True - ) - next(reader) # Skip header - for row in reader: - if not csv_row_is_valid(row): - continue - try: - user = GDUserTarget.from_csv_row(row, args.ug_delimiter) except Exception as e: logger.error(f'Unable to load following row: "{row}". Error: "{e}"') continue + users.append(user) return users @@ -305,20 +128,22 @@ def validate_args(args: argparse.Namespace) -> None: raise RuntimeError("The quotechar argument must be exactly one character long.") -def user_mgmt(args): +def user_mgmt(args: argparse.Namespace) -> None: """Main function for user management.""" if args.verbose: logger.setLevel(logging.DEBUG) validate_args(args) - users = read_users_from_csv(args) + users = read_users_from_csv( + args.user_csv, args.delimiter, args.quotechar, args.ug_delimiter + ) - sdk = create_clients(args) + provisioner = create_provisioner(UserProvisioner, args.profile_config, args.profile) - user_manager = UserManager(sdk) + provisioner.logger.subscribe(logger) - user_manager.manage_users(users) + provisioner.incremental_load(users) if __name__ == "__main__": diff --git a/scripts/utils/utils.py b/scripts/utils/utils.py index 71cd708..6774ef0 100644 --- a/scripts/utils/utils.py +++ b/scripts/utils/utils.py @@ -2,6 +2,14 @@ """This module contains general utility functions.""" import csv +import logging +import os +from pathlib import Path +from typing import Type + +from gooddata_pipelines.provisioning.provisioning import Provisioning + +logger = logging.getLogger(__name__) def read_csv_file_to_dict(file_path: str) -> list[dict[str, str]]: @@ -15,3 +23,26 @@ def read_csv_file_to_dict(file_path: str) -> list[dict[str, str]]: """ with open(file_path, "r", encoding="utf-8") as file: return list(csv.DictReader(file)) + + +def create_provisioner( + ProvisionerType: Type[Provisioning], profile_config: Path, profile: str +) -> Provisioning: + """Creates GoodData SDK client.""" + gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") + gdc_hostname = os.environ.get("GDC_HOSTNAME") + + if gdc_hostname and gdc_auth_token: + logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") + return ProvisionerType.create(host=gdc_hostname, token=gdc_auth_token) + + if os.path.exists(profile_config): + logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") + return ProvisionerType.create_from_profile( + profile=profile, profiles_path=profile_config + ) + + raise RuntimeError( + "No GoodData credentials provided. Please export required ENVVARS " + "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." + ) diff --git a/tests/test_user_mgmt.py b/tests/test_user_mgmt.py index 4d318ee..3637ed7 100644 --- a/tests/test_user_mgmt.py +++ b/tests/test_user_mgmt.py @@ -1,62 +1,19 @@ # (C) 2025 GoodData Corporation +import os +import sys + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../scripts")) +) + + import argparse -from dataclasses import dataclass -from typing import Any, Optional from unittest import mock -import gooddata_sdk as gd_sdk import pytest -from gooddata_api_client.exceptions import NotFoundException from scripts import user_mgmt -TEST_CSV_PATH = "tests/data/user_mgmt/input.csv" - - -@dataclass -class MockUser: - id: str - firstname: Optional[str] - lastname: Optional[str] - email: Optional[str] - authenticationId: Optional[str] - user_groups: list[str] - - def to_sdk(self): - return gd_sdk.CatalogUser.init( - user_id=self.id, - firstname=self.firstname, - lastname=self.lastname, - email=self.email, - authentication_id=self.authenticationId, - user_group_ids=self.user_groups, - ) - - def to_json(self): - attrs = {} - if self.authenticationId: - attrs["authenticationId"] = self.authenticationId - if self.firstname: - attrs["firstname"] = self.firstname - if self.lastname: - attrs["lastname"] = self.lastname - if self.email: - attrs["email"] = self.email - - data = { - "id": self.id, - "type": "user", - "attributes": attrs, - } - - if not self.user_groups: - return data - - relsdata = [{"id": group, "type": "userGroup"} for group in self.user_groups] - if relsdata: - data["relationships"] = {"userGroups": {"data": relsdata}} - return data - @mock.patch("os.path.exists") def test_conflicting_delimiters_raises_error(path_exists): @@ -66,137 +23,3 @@ def test_conflicting_delimiters_raises_error(path_exists): ) with pytest.raises(RuntimeError): user_mgmt.validate_args(args) - - -def test_user_obj_from_sdk(): - user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", ["ug"]) - excepted = user_mgmt.GDUserTarget( - "some.user", "some", "user", "some@email.com", "auth", ["ug"], True - ) - user = user_mgmt.GDUserTarget.from_sdk_obj(user_input.to_sdk()) - assert excepted == user - - -def test_user_obj_from_sdk_no_ugs(): - user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", []) - excepted = user_mgmt.GDUserTarget( - "some.user", "some", "user", "some@email.com", "auth", [], True - ) - user = user_mgmt.GDUserTarget.from_sdk_obj(user_input.to_sdk()) - assert excepted == user - - -def test_user_obj_to_sdk(): - user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", ["ug"]) - user = user_mgmt.GDUserTarget( - "some.user", "some", "user", "some@email.com", "auth", ["ug"], True - ) - excepted = user_input.to_sdk() - assert excepted == user.to_sdk_obj() - - -def test_user_obj_to_sdk_no_ugs(): - user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", []) - user = user_mgmt.GDUserTarget( - "some.user", "some", "user", "some@email.com", "auth", [], True - ) - excepted = user_input.to_sdk() - assert excepted == user.to_sdk_obj() - - -class MockResponse: - def __init__(self, status_code, json_response: dict[str, Any] = {}, text: str = ""): - self.status_code = status_code - self.json_response = json_response - self.text = text - - def json(self): - return self.json_response - - -UPSTREAM_USERS = { - "jozef.mrkva": MockUser( - "jozef.mrkva", "jozef", "mrkva", "jozef.mrkva@test.com", "auth_id_1", [] - ), - "kristian.kalerab": MockUser( - "kristian.kalerab", - "kristian", - "kalerab", - "kristian.kalerab@test.com", - "auth_id_5", - [], - ), - "richard.cvikla": MockUser( - "richard.cvikla", "richard", "cvikla", None, "auth_id_6", [] - ), - "adam.avokado": MockUser("adam.avokado", None, None, None, "auth_id_7", []), -} - -UPSTREAM_UG_ID = "ug_1" -EXPECTED_NEW_UG_OBJ = gd_sdk.CatalogUserGroup.init("ug_2", "ug_2") -EXPECTED_GET_IDS = {"jozef.mrkva", "kristian.kalerab", "peter.pertzlen", "zoltan.zeler"} -EXPECTED_CREATE_OR_UPDATE_IDS = {"peter.pertzlen", "zoltan.zeler", "kristian.kalerab"} - - -def prepare_sdk(): - def mock_get_user(user_id): - if user_id not in UPSTREAM_USERS: - raise NotFoundException - return UPSTREAM_USERS[user_id].to_sdk() - - def mock_get_user_group(ug_id): - if ug_id != UPSTREAM_UG_ID: - raise NotFoundException - return - - sdk = mock.Mock() - sdk.catalog_user.get_user.side_effect = mock_get_user - sdk.catalog_user.get_user_group.side_effect = mock_get_user_group - return sdk - - -""" -jozef - No change; user exists -bartolomej - no change; user doesnt exist -peter - create (2 ugs); 1 ug exists, 1 doesnt -zoltan - create (1 ug); ug exists -kristian - update -richard - delete (diff fields than in upstream) -adam - delete (same fields as in upstream) -""" - - -@mock.patch("scripts.user_mgmt.create_clients") -def test_user_mgmt_e2e(create_client): - sdk = prepare_sdk() - create_client.return_value = sdk - - args = argparse.Namespace( - user_csv=TEST_CSV_PATH, - delimiter=",", - ug_delimiter="|", - quotechar='"', - verbose=False, - ) - - user_mgmt.user_mgmt(args) - - sdk.catalog_user.get_user.assert_has_calls( - [mock.call(id) for id in EXPECTED_GET_IDS], - any_order=True, - ) - - created_or_updated = { - call[0][0].id for call in sdk.catalog_user.create_or_update_user.call_args_list - } - assert created_or_updated == EXPECTED_CREATE_OR_UPDATE_IDS - - sdk.catalog_user.delete_user.assert_has_calls( - [mock.call("richard.cvikla"), mock.call("adam.avokado")] - ) - sdk.catalog_user.get_user_group.assert_has_calls( - [mock.call("ug_1"), mock.call("ug_2")] - ) - sdk.catalog_user.create_or_update_user_group.assert_called_once_with( - EXPECTED_NEW_UG_OBJ - )