From 3c53adfc659d6c8c5903f5d1dbc7495b8f33cb6b Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Mon, 31 Jul 2023 18:56:34 +0700 Subject: [PATCH 01/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Implement=20fix=20for?= =?UTF-8?q?=20export=20and=20restore=20add-on=20storages?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- addons.json | 4 +- admin/base/schemas/file-info-schema.json | 55 +++-- .../export_data/utils.py | 50 ++++- .../export_data/views/export.py | 2 +- .../export_data/views/management.py | 17 +- .../export_data/views/restore.py | 146 ++++++------ admin/rdm_custom_storage_location/utils.py | 27 ++- admin/rdm_custom_storage_location/views.py | 3 +- .../export_data/test_utils.py | 131 ++++++++++- .../export_data/views/test_management.py | 4 +- .../export_data/views/test_restore.py | 68 ++---- .../rdm_custom_storage_location/test_utils.py | 81 ++++++- osf/models/export_data.py | 209 ++++++++++++++---- osf/models/export_data_restore.py | 182 +++++++++++---- osf_tests/factories.py | 61 ++++- osf_tests/test_export_data.py | 92 ++++++++ osf_tests/test_export_data_restore.py | 83 ++++++- 17 files changed, 966 insertions(+), 249 deletions(-) diff --git a/addons.json b/addons.json index 2f99026fd96..fd9270b3983 100644 --- a/addons.json +++ b/addons.json @@ -168,9 +168,7 @@ "dropboxbusiness", "s3compatinstitutions", "ociinstitutions", - "s3", - "s3compat", - "owncloud" + "onedrivebusiness" ], "institutional_storage_bulk_mount_method": [ "box", diff --git a/admin/base/schemas/file-info-schema.json b/admin/base/schemas/file-info-schema.json index 4801f810655..8882b58aee7 100644 --- a/admin/base/schemas/file-info-schema.json +++ b/admin/base/schemas/file-info-schema.json @@ -135,15 +135,6 @@ ] } }, - "required": [ - "host", - "object", - "folder", - "address", - "service", - "version", - "provider" - ], "additionalProperties": true }, "project": { @@ -244,15 +235,6 @@ ] } }, - "required": [ - "host", - "object", - "folder", - "address", - "service", - "version", - "provider" - ], "additionalProperties": true }, "metadata": { @@ -318,6 +300,12 @@ "null" ] }, + "quickXorHash": { + "type": [ + "string", + "null" + ] + }, "size": { "type": [ "string", @@ -366,10 +354,6 @@ "path", "etag", "materialized", - "md5", - "sha1", - "sha256", - "sha512", "size", "sizeInt", "extra", @@ -378,6 +362,33 @@ "modified", "modified_utc" ], + "anyOf": [ + { + "required": [ + "md5" + ] + }, + { + "required": [ + "sha1" + ] + }, + { + "required": [ + "sha256" + ] + }, + { + "required": [ + "sha512" + ] + }, + { + "required": [ + "quickXorHash" + ] + } + ], "additionalProperties": true } }, diff --git a/admin/rdm_custom_storage_location/export_data/utils.py b/admin/rdm_custom_storage_location/export_data/utils.py index f8ef02e981c..6203806d556 100644 --- a/admin/rdm_custom_storage_location/export_data/utils.py +++ b/admin/rdm_custom_storage_location/export_data/utils.py @@ -7,6 +7,7 @@ import jsonschema import requests +from django.db import transaction from django.db.models import Q from rest_framework import status as http_status @@ -29,6 +30,8 @@ ExportDataRestore, ExportDataLocation, ExternalAccount, + BaseFileNode, + AbstractNode, ) from website.settings import WATERBUTLER_URL, INSTITUTIONAL_STORAGE_ADD_ON_METHOD, INSTITUTIONAL_STORAGE_BULK_MOUNT_METHOD from website.util import inspect_info # noqa @@ -653,6 +656,25 @@ def copy_file_from_location_to_destination(export_data, destination_node_id, des return copy_response_body +def prepare_file_node_for_add_on_storage(node_id, provider, file_path, **kwargs): + """ Add new file node record for add-on storage """ + if not is_add_on_storage(provider): + # Bulk-mount storage already created file node from other functions, do nothing here + return + + with transaction.atomic(): + node = AbstractNode.load(node_id) + if node.type == 'osf.node': + # Only get or create file nodes that belongs to projects + file_node = BaseFileNode.resolve_class(provider, BaseFileNode.FILE).get_or_create(node, file_path) + extras = {'cookie': kwargs.get('cookie')} + file_node.touch( + auth_header=None, + **extras, + ) + # signals.file_updated.send(target=node, user=user, event_type=NodeLog.FILE_COPIED, payload=payload) + + def move_file(node_id, provider, source_file_path, destination_file_path, cookies, callback_log=False, base_url=WATERBUTLER_URL, is_addon_storage=True, **kwargs): move_old_data_url = waterbutler_api_url_for( @@ -697,6 +719,13 @@ def move_addon_folder_to_backup( paths = path.split('/') paths.insert(1, f'backup_{process_start}') new_path = '/'.join(paths) + if provider == 'nextcloudinstitutions' and len(paths) > 2: + # Nextcloud for Institutions: try to create new parent folders before moving files + result = create_parent_folders_for_nextcloud_for_institutions(node_id, provider, paths, cookies=cookies, + callback_log=callback_log, base_url=base_url, **kwargs) + if result is not None: + return result + response = move_file(node_id, provider, path, new_path, cookies, callback_log, base_url, is_addon_storage=True, **kwargs) if response.status_code != 200 and response.status_code != 201 and response.status_code != 202: @@ -738,6 +767,13 @@ def move_addon_folder_from_backup(node_id, provider, process_start, cookies, cal else: continue new_path = '/'.join(paths) + if provider == 'nextcloudinstitutions' and len(paths) > 2: + # Nextcloud for Institutions: try to create new parent folders before moving files + result = create_parent_folders_for_nextcloud_for_institutions(node_id, provider, paths, cookies=cookies, + callback_log=callback_log, base_url=base_url, **kwargs) + if result is not None: + return result + response = move_file(node_id, provider, path, new_path, cookies, callback_log, base_url, is_addon_storage=True, **kwargs) if response.status_code != 200 and response.status_code != 201 and response.status_code != 202: @@ -801,7 +837,7 @@ def get_all_file_paths_in_addon_storage(node_id, provider, file_path, cookies, b return list_file_path, root_child_folders else: - return [file_path], [] + return [], [] except Exception: return [], [] @@ -1007,3 +1043,15 @@ def is_add_on_storage(provider): # Default value for unknown provider return None + + +def create_parent_folders_for_nextcloud_for_institutions(node_id, provider, paths, **kwargs): + """ Nextcloud for Institutions: create folders before moving files """ + parent_path = '/' + for path in paths[1:len(paths) - 1]: + folder_path = f'{path}/' + _, status_code = create_folder(node_id, provider, parent_path, folder_path, **kwargs) + if status_code not in [201, 409]: + return {'error': 'Cannot create folder for Nextcloud for Institutions'} + parent_path += folder_path + return None diff --git a/admin/rdm_custom_storage_location/export_data/views/export.py b/admin/rdm_custom_storage_location/export_data/views/export.py index 971a08460e8..85a550f974b 100644 --- a/admin/rdm_custom_storage_location/export_data/views/export.py +++ b/admin/rdm_custom_storage_location/export_data/views/export.py @@ -109,7 +109,7 @@ def export_data_process(task, cookies, export_data_id, **kwargs): try: # extract file information - export_data_json, file_info_json = export_data.extract_file_information_json_from_source_storage() + export_data_json, file_info_json = export_data.extract_file_information_json_from_source_storage(**kwargs) if task.is_aborted(): # check before each steps return None diff --git a/admin/rdm_custom_storage_location/export_data/views/management.py b/admin/rdm_custom_storage_location/export_data/views/management.py index d7bb530b88f..45c70a4a9ab 100644 --- a/admin/rdm_custom_storage_location/export_data/views/management.py +++ b/admin/rdm_custom_storage_location/export_data/views/management.py @@ -16,6 +16,7 @@ process_data_information, validate_exported_data, count_files_ng_ok, + is_add_on_storage, ) from osf.models import ExportData, Institution from website.util import inspect_info # noqa @@ -419,7 +420,7 @@ def get(self, request, data_id): return JsonResponse({'message': message}, status=400) # Get data from current source storage - _, storage_file_info = export_data.extract_file_information_json_from_source_storage() + _, storage_file_info = export_data.extract_file_information_json_from_source_storage(cookie=cookie) exported_file_versions = process_data_information(exported_file_info['files']) storage_file_versions = process_data_information(storage_file_info['files']) exclude_keys = [] @@ -467,10 +468,20 @@ def get(self, request, data_id): return JsonResponse({'message': message}, status=400) # Get data from current destination storage - _, storage_file_info = restore_data.extract_file_information_json_from_destination_storage() + _, storage_file_info = restore_data.extract_file_information_json_from_destination_storage(cookie=cookie) exported_file_versions = process_data_information(exported_file_info['files']) storage_file_versions = process_data_information(storage_file_info['files']) - exclude_keys = [] + exported_provider_name = export_data.source_waterbutler_settings.get('storage', {}).get('provider') + if is_add_on_storage(exported_provider_name): + exclude_keys = ['id', 'path', 'created_at', 'modified_at', 'timestamp_id', + # location/ + 'location', + # metadata/ + 'etag', 'extra', 'modified', 'provider', 'contentType', 'modified_utc', 'created_utc'] + else: + exclude_keys = ['host', 'bucket', 'folder', 'service', 'provider', 'verify_ssl', 'address', 'version', + # metadata/ + 'etag', 'extra'] data = count_files_ng_ok(exported_file_versions, storage_file_versions, exclude_keys=exclude_keys) # end check diff --git a/admin/rdm_custom_storage_location/export_data/views/restore.py b/admin/rdm_custom_storage_location/export_data/views/restore.py index 08786acbc42..e714bd3f85b 100644 --- a/admin/rdm_custom_storage_location/export_data/views/restore.py +++ b/admin/rdm_custom_storage_location/export_data/views/restore.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import +import hashlib import inspect # noqa import json import logging @@ -22,7 +23,7 @@ from admin.rdm_custom_storage_location import tasks from admin.rdm_custom_storage_location.export_data import utils from osf.models import ExportData, ExportDataRestore, BaseFileNode, Tag, RdmFileTimestamptokenVerifyResult, Institution, OSFUser, FileVersion, AbstractNode, \ - ProjectStorageType, UserQuota + ProjectStorageType, UserQuota, Guid from website.util import inspect_info # noqa from framework.transactions.handlers import no_auto_transaction from website.util.quota import update_user_used_quota @@ -120,7 +121,6 @@ def check_before_restore_export_data(cookies, export_id, destination_id, **kwarg export_data.status = pre_status export_data.save() return {'open_dialog': False, 'message': f'The export data files are corrupted'} - destination_first_project_id = export_data_folders[0].get('project', {}).get('id') # Check whether the restore destination storage is not empty destination_region = Region.objects.filter(id=destination_id).first() @@ -132,23 +132,25 @@ def check_before_restore_export_data(cookies, export_id, destination_id, **kwarg destination_provider = destination_region.provider_name if utils.is_add_on_storage(destination_provider): try: - destination_base_url = destination_region.waterbutler_url - response = utils.get_file_data(destination_first_project_id, destination_provider, '/', cookies, - destination_base_url, get_file_info=True, **kwargs) - if response.status_code != status.HTTP_200_OK: - # Error - logger.error(f'Return error with response: {response.content}') - export_data.status = pre_status - export_data.save() - return {'open_dialog': False, 'message': f'Cannot connect to destination storage'} - - response_body = response.json() - data = response_body.get('data') - if len(data) != 0: - # Destination storage is not empty, show confirm dialog - export_data.status = pre_status - export_data.save() - return {'open_dialog': True} + project_ids = {item.get('project', {}).get('id') for item in export_data_folders} + for project_id in project_ids: + destination_base_url = destination_region.waterbutler_url + response = utils.get_file_data(project_id, destination_provider, '/', cookies, + destination_base_url, get_file_info=True, **kwargs) + if response.status_code != status.HTTP_200_OK: + # Error + logger.error(f'Return error with response: {response.content}') + export_data.status = pre_status + export_data.save() + return {'open_dialog': False, 'message': f'Cannot connect to destination storage'} + + response_body = response.json() + data = response_body.get('data') + if len(data) != 0: + # Destination storage is not empty, show confirm dialog + export_data.status = pre_status + export_data.save() + return {'open_dialog': True} except Exception as e: logger.error(f'Exception: {e}') export_data.status = pre_status @@ -195,7 +197,6 @@ def restore_export_data_process(task, cookies, export_id, export_data_restore_id export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), status=ExportData.STATUS_COMPLETED) return {'message': 'Restore data successfully.'} - # destination_first_project_id = export_data_files[0].get('project', {}).get('id') check_if_restore_process_stopped(task, current_process_step) current_process_step = 1 @@ -205,8 +206,8 @@ def restore_export_data_process(task, cookies, export_id, export_data_restore_id destination_provider = destination_region.provider_name if utils.is_add_on_storage(destination_provider): # Move all existing files/folders in destination to backup_{process_start} folder - for project_id in list_project_id: - # move_all_files_to_backup_folder(task, current_process_step, destination_first_project_id, export_data_restore, cookies, **kwargs) + project_ids = {item.get('project', {}).get('id') for item in export_data_folders} + for project_id in project_ids: move_all_files_to_backup_folder(task, current_process_step, project_id, export_data_restore, cookies, **kwargs) check_if_restore_process_stopped(task, current_process_step) @@ -321,6 +322,9 @@ def post(self, request, *args, **kwargs): export_data_restore.update(status=ExportData.STATUS_ERROR) return Response({'message': f'Cannot stop restore process at this time.'}, status=status.HTTP_400_BAD_REQUEST) + export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), + status=ExportData.STATUS_STOPPED) + # Start rollback restore export data process process = tasks.run_restore_export_data_rollback_process.delay( cookies, @@ -518,31 +522,13 @@ def recalculate_user_quota(destination_region): update_user_used_quota(user, storage_type=UserQuota.CUSTOM_STORAGE) -def generate_new_file_path(file_materialized_path, version_id, is_file_not_latest_version): - new_file_materialized_path = file_materialized_path - if len(file_materialized_path) > 0 and is_file_not_latest_version: - # for past version files, rename and save each version as filename_{version} in '_version_files' folder - path_splits = new_file_materialized_path.split('/') - - # add _{version} to file name - file_name = path_splits[len(path_splits) - 1] - file_splits = file_name.split('.') - file_splits[0] = f'{file_splits[0]}_{version_id}' - versioned_file_name = '.'.join(file_splits) - - # add _version_files to folder path - path_splits.insert(len(path_splits) - 1, '_version_files') - path_splits[len(path_splits) - 1] = versioned_file_name - new_file_materialized_path = '/'.join(path_splits) - return new_file_materialized_path - - def move_all_files_to_backup_folder(task, current_process_step, destination_first_project_id, export_data_restore, cookies, **kwargs): try: destination_region = export_data_restore.destination - destination_provider = INSTITUTIONAL_STORAGE_PROVIDER_NAME + destination_provider = destination_region.provider_name destination_base_url = destination_region.waterbutler_url is_destination_addon_storage = utils.is_add_on_storage(destination_provider) + destination_provider = destination_provider if is_destination_addon_storage else INSTITUTIONAL_STORAGE_PROVIDER_NAME with transaction.atomic(): # Preload params to function check_task_aborted_function = partial( @@ -551,7 +537,7 @@ def move_all_files_to_backup_folder(task, current_process_step, destination_firs current_process_step=current_process_step) # move all old data in restore destination storage to a folder to back up folder - if is_destination_addon_storage: + if is_destination_addon_storage and destination_provider != 'onedrivebusiness': move_folder_to_backup = partial(utils.move_addon_folder_to_backup) else: move_folder_to_backup = partial(utils.move_bulk_mount_folder_to_backup) @@ -577,8 +563,10 @@ def move_all_files_to_backup_folder(task, current_process_step, destination_firs def create_folder_in_destination(task, current_process_step, export_data_folders, export_data_restore, cookies, **kwargs): destination_region = export_data_restore.destination - destination_provider = INSTITUTIONAL_STORAGE_PROVIDER_NAME + destination_provider = destination_region.provider_name destination_base_url = destination_region.waterbutler_url + is_destination_addon_storage = utils.is_add_on_storage(destination_provider) + destination_provider = destination_provider if is_destination_addon_storage else INSTITUTIONAL_STORAGE_PROVIDER_NAME list_updated_projects = [] for folder in export_data_folders: check_if_restore_process_stopped(task, current_process_step) @@ -600,9 +588,10 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor export_data = export_data_restore.export destination_region = export_data_restore.destination - destination_provider = INSTITUTIONAL_STORAGE_PROVIDER_NAME + destination_provider = destination_region.provider_name destination_base_url = destination_region.waterbutler_url is_destination_addon_storage = utils.is_add_on_storage(destination_provider) + destination_provider = destination_provider if is_destination_addon_storage else INSTITUTIONAL_STORAGE_PROVIDER_NAME list_created_file_nodes = [] list_file_restore_fail = [] @@ -617,9 +606,14 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor file_checkout_id = file.get('checkout_id') file_created = file.get('created_at') file_modified = file.get('modified_at') + file_provider = file.get('provider') - # Sort file by version id - file_versions.sort(key=lambda k: k.get('identifier', 0)) + if is_destination_addon_storage: + # Sort file by version modify date + file_versions.sort(key=lambda k: k.get('modified_at')) + else: + # Sort file by version id + file_versions.sort(key=lambda k: k.get('identifier', 0)) for index, version in enumerate(file_versions): try: @@ -627,7 +621,13 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor # Prepare file name and file path for uploading metadata = version.get('metadata', {}) - file_hash = metadata.get('sha256', metadata.get('md5')) + file_hash = metadata.get('sha256', metadata.get('md5', metadata.get('sha512', metadata.get('sha1')))) + if file_provider == 'onedrivebusiness': + # OneDrive Business: get hash file name based on quickXorHash and file version modified time + quick_xor_hash = metadata.get('quickXorHash') + file_version_modified = version.get('modified_at') + new_string_to_hash = f'{quick_xor_hash}{file_version_modified}' + file_hash = hashlib.sha256(new_string_to_hash.encode('utf-8')).hexdigest() version_id = version.get('identifier') if file_hash is None or version_id is None: # Cannot get path in export data storage, pass this file @@ -635,25 +635,17 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor file_hash_path = f'/{export_data.export_data_folder_name}/{ExportData.EXPORT_DATA_FILES_FOLDER}/{file_hash}' - # If the destination storage is add-on institutional storage: - # - for past version files, rename and save each version as filename_{version} in '_version_files' folder - # - the latest version is saved as the original - if is_destination_addon_storage: - is_file_not_latest_version = index < len(file_versions) - 1 - new_file_path = generate_new_file_path( - file_materialized_path=file_materialized_path, - version_id=version_id, - is_file_not_latest_version=is_file_not_latest_version) - else: - new_file_path = file_materialized_path - # Copy file from location to destination storage - response_body = utils.copy_file_from_location_to_destination(export_data, file_project_id, destination_provider, file_hash_path, new_file_path, - cookies, base_url=destination_base_url, **kwargs) + response_body = utils.copy_file_from_location_to_destination(export_data, file_project_id, destination_provider, file_hash_path, + file_materialized_path, cookies, base_url=destination_base_url, **kwargs) if response_body is None: list_file_restore_fail.append(file) continue + if is_destination_addon_storage: + # Create file node if not have for add-on storage + utils.prepare_file_node_for_add_on_storage(file_project_id, destination_provider, file_materialized_path, **kwargs) + response_id = response_body.get('data', {}).get('id') response_file_version_id = response_body.get('data', {}).get('attributes', {}).get('extra', {}).get('version', version_id) if response_id.startswith('osfstorage'): @@ -705,6 +697,28 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor BaseFileNode.objects.filter(id=node.id).update(created=file_created, modified=file_modified) + list_created_file_nodes.append({ + 'node': node, + 'file_tags': file_tags, + 'file_timestamp': file_timestamp, + 'project_id': file_project_id + }) + else: + # If id is provider_name/[path] then get path + file_path_splits = response_id.split('/') + if len(file_path_splits) >= 2: + file_path_splits[0] = '' + file_node_path = '/'.join(file_path_splits) + project_id = Guid.objects.filter(_id=file_project_id).values_list('id', flat=True).first() + if project_id is None: + continue + node_set = BaseFileNode.objects.filter( + type='osf.{}file'.format(destination_provider), + _path=file_node_path, + target_object_id=project_id, + deleted=None) + if node_set.exists(): + node = node_set.first() list_created_file_nodes.append({ 'node': node, 'file_tags': file_tags, @@ -740,7 +754,8 @@ def add_tag_and_timestamp_to_database(task, current_process_step, list_created_f def delete_all_files_except_backup_folder(export_data_restore, location_id, destination_first_project_id, cookies, **kwargs): destination_region = export_data_restore.destination destination_base_url = destination_region.waterbutler_url - destination_provider = INSTITUTIONAL_STORAGE_PROVIDER_NAME + destination_provider = destination_region.provider_name if utils.is_add_on_storage( + destination_region.provider_name) else INSTITUTIONAL_STORAGE_PROVIDER_NAME try: utils.delete_all_files_except_backup( @@ -753,12 +768,13 @@ def delete_all_files_except_backup_folder(export_data_restore, location_id, dest def move_all_files_from_backup_folder_to_root(export_data_restore, destination_first_project_id, cookies, **kwargs): destination_region = export_data_restore.destination - destination_provider = INSTITUTIONAL_STORAGE_PROVIDER_NAME + destination_provider = destination_region.provider_name destination_base_url = destination_region.waterbutler_url is_destination_addon_storage = utils.is_add_on_storage(destination_provider) + destination_provider = destination_provider if is_destination_addon_storage else INSTITUTIONAL_STORAGE_PROVIDER_NAME try: - if is_destination_addon_storage: + if is_destination_addon_storage and destination_provider != 'onedrivebusiness': move_folder_from_backup = partial(utils.move_addon_folder_from_backup) else: move_folder_from_backup = partial(utils.move_bulk_mount_folder_from_backup) diff --git a/admin/rdm_custom_storage_location/utils.py b/admin/rdm_custom_storage_location/utils.py index 6e964743480..e54e77442d6 100644 --- a/admin/rdm_custom_storage_location/utils.py +++ b/admin/rdm_custom_storage_location/utils.py @@ -14,6 +14,8 @@ import owncloud from django.core.exceptions import ValidationError +from addons.dropboxbusiness.models import node_post_save as dropboxbusiness_post_save +from addons.onedrivebusiness.models import node_post_save as onedrivebusiness_post_save from admin.rdm_addons.utils import get_rdm_addon_option from addons.googledrive.client import GoogleDriveClient from addons.osfstorage.models import Region @@ -37,13 +39,16 @@ from addons.base.institutions_utils import (KEYNAME_BASE_FOLDER, KEYNAME_USERMAP, KEYNAME_USERMAP_TMP, - sync_all) + sync_all, + node_post_save) from framework.exceptions import HTTPError +from osf.models import AbstractNode from website import settings as osf_settings from osf.models.external import ExternalAccountTemporary, ExternalAccount from osf.utils import external_util import datetime +from website.settings import INSTITUTIONAL_STORAGE_ADD_ON_METHOD from website.util import inspect_info # noqa logger = logging.getLogger(__name__) @@ -1134,3 +1139,23 @@ def save_usermap_from_tmp(provider_name, institution): rdm_addon_option.extended[KEYNAME_USERMAP] = new_usermap del rdm_addon_option.extended[KEYNAME_USERMAP_TMP] rdm_addon_option.save() + + +def add_node_settings_to_projects(institution, provider_name): + if provider_name not in INSTITUTIONAL_STORAGE_ADD_ON_METHOD: + # If storage is bulk-mount then do nothing + return + + # Get projects that does not have provider's node settings + institution_users = institution.osfuser_set.all() + kwargs = {f'addons_{provider_name}_node_settings': None} + projects = AbstractNode.objects.filter(type='osf.node', is_deleted=False, creator__in=institution_users, **kwargs) + + # Add node settings to above projects + for project in projects: + if provider_name == 'dropboxbusiness': + dropboxbusiness_post_save(None, project, True) + elif provider_name == 'onedrivebusiness': + onedrivebusiness_post_save(None, project, True) + else: + node_post_save(None, project, True) diff --git a/admin/rdm_custom_storage_location/views.py b/admin/rdm_custom_storage_location/views.py index 4799fdf3ff3..44253fe6f6f 100644 --- a/admin/rdm_custom_storage_location/views.py +++ b/admin/rdm_custom_storage_location/views.py @@ -234,8 +234,8 @@ def post(self, request): data.get('s3compatinstitutions_access_key'), data.get('s3compatinstitutions_secret_key'), data.get('s3compatinstitutions_bucket'), - bool(strtobool(data.get('s3compatinstitutions_server_side_encryption'))), provider_short_name, + bool(strtobool(data.get('s3compatinstitutions_server_side_encryption'))), ) elif provider_short_name == 'ociinstitutions': result = utils.save_ociinstitutions_credentials( @@ -325,6 +325,7 @@ def post(self, request): if status == http_status.HTTP_200_OK: utils.change_allowed_for_institutions( institution, provider_short_name) + utils.add_node_settings_to_projects(institution, provider_short_name) return JsonResponse(result[0], status=status) diff --git a/admin_tests/rdm_custom_storage_location/export_data/test_utils.py b/admin_tests/rdm_custom_storage_location/export_data/test_utils.py index a59f911eaf5..dd8a51397e1 100644 --- a/admin_tests/rdm_custom_storage_location/export_data/test_utils.py +++ b/admin_tests/rdm_custom_storage_location/export_data/test_utils.py @@ -17,15 +17,18 @@ from admin.rdm_custom_storage_location.export_data.views.restore import ProcessError from admin_tests.utilities import setup_view from framework.celery_tasks import app as celery_app -from osf.models import ExportData +from osf.models import ExportData, BaseFileNode from osf_tests.factories import ( AuthUserFactory, InstitutionFactory, ExportDataLocationFactory, ExportDataFactory, ExportDataRestoreFactory, + ProjectFactory, + DraftNodeFactory, ) from tests.base import AdminTestCase +from website.settings import INSTITUTIONAL_STORAGE_ADD_ON_METHOD FAKE_TASK_ID = '00000000-0000-0000-0000-000000000000' RESTORE_EXPORT_DATA_PATH = 'admin.rdm_custom_storage_location.export_data.views.restore' @@ -2303,6 +2306,47 @@ def test_copy_file_from_location_to_destination_failed_to_create_folder(self, mo mock_copy_file.assert_not_called() nt.assert_equal(response, None) + # prepare_file_node_for_add_on_storage + def test_prepare_file_node_for_add_on_storage(self): + file_path = '/folder/test_file.txt' + test_response_data = { + 'data': { + 'attributes': { + 'name': 'test_file.txt', + 'materialized': file_path, + 'modified': None + } + } + } + test_response = requests.Response() + test_response.status_code = status.HTTP_200_OK + test_response._content = json.dumps(test_response_data).encode('utf-8') + mock_get = MagicMock() + mock_get.return_value = test_response + project = ProjectFactory() + with patch('requests.get', mock_get): + for provider_name in INSTITUTIONAL_STORAGE_ADD_ON_METHOD: + # Test for each add-on storages + utils.prepare_file_node_for_add_on_storage(project._id, provider_name, file_path) + file_node_query_set = BaseFileNode.objects.filter(provider=provider_name, _path=file_path) + nt.assert_true(file_node_query_set.exists()) + file_node = file_node_query_set.first() + nt.assert_true(file_node.name) + nt.assert_true(file_node.materialized_path) + nt.assert_true(file_node.last_touched) + + def test_prepare_file_node_for_add_on_storage_bulk_mount_storage(self): + project = ProjectFactory() + utils.prepare_file_node_for_add_on_storage(project._id, 'osfstorage', '/text_file.txt') + file_node_query_set = BaseFileNode.objects.filter(provider='osfstorage', _path='/text_file.txt') + nt.assert_false(file_node_query_set.exists()) + + def test_prepare_file_node_for_add_on_storage_draft_node(self): + draft_node = DraftNodeFactory() + utils.prepare_file_node_for_add_on_storage(draft_node._id, 's3compatinstitutions', '/text_file.txt') + file_node_query_set = BaseFileNode.objects.filter(provider='s3compatinstitutions', _path='/text_file.txt') + nt.assert_false(file_node_query_set.exists()) + # move_file def test_move_file_in_addon_storage(self): test_response = requests.Response() @@ -2360,6 +2404,46 @@ def test_move_addon_folder_to_backup_success(self, mock_get_all_paths, mock_move mock_delete_paths.assert_called() nt.assert_equal(result, {}) + @patch(f'{EXPORT_DATA_UTIL_PATH}.delete_paths') + @patch(f'{EXPORT_DATA_UTIL_PATH}.move_file') + @patch(f'{EXPORT_DATA_UTIL_PATH}.get_all_file_paths_in_addon_storage') + @patch(f'{EXPORT_DATA_UTIL_PATH}.create_folder') + def test_move_addon_folder_to_backup_nextcloudinstitutions(self, mock_create_folder, mock_get_all_paths, mock_move_file, mock_delete_paths): + test_response = requests.Response() + test_response.status_code = status.HTTP_200_OK + + mock_get_all_paths.return_value = (['/a/b/c/folder/test1.txt'], []) + mock_move_file.return_value = test_response + mock_delete_paths.return_value = None + mock_create_folder.return_value = (None, 201) + + result = utils.move_addon_folder_to_backup(TEST_PROJECT_ID, 'nextcloudinstitutions', + self.export_data_restore.process_start_timestamp, + None) + mock_get_all_paths.assert_called() + mock_move_file.assert_called() + mock_delete_paths.assert_called() + nt.assert_equal(result, {}) + + @patch(f'{EXPORT_DATA_UTIL_PATH}.delete_paths') + @patch(f'{EXPORT_DATA_UTIL_PATH}.move_file') + @patch(f'{EXPORT_DATA_UTIL_PATH}.get_all_file_paths_in_addon_storage') + @patch(f'{EXPORT_DATA_UTIL_PATH}.create_folder') + def test_move_addon_folder_to_backup_nextcloudinstitutions_create_folder_fail(self, mock_create_folder, mock_get_all_paths, mock_move_file, mock_delete_paths): + test_response = requests.Response() + test_response.status_code = status.HTTP_200_OK + + mock_get_all_paths.return_value = (['/a/b/c/folder/test1.txt'], []) + mock_move_file.return_value = test_response + mock_delete_paths.return_value = None + mock_create_folder.return_value = (None, 400) + + result = utils.move_addon_folder_to_backup(TEST_PROJECT_ID, 'nextcloudinstitutions', + self.export_data_restore.process_start_timestamp, + None) + + nt.assert_equal(result, {'error': 'Cannot create folder for Nextcloud for Institutions'}) + @patch(f'{EXPORT_DATA_UTIL_PATH}.delete_paths') @patch(f'{EXPORT_DATA_UTIL_PATH}.move_file') @patch(f'{EXPORT_DATA_UTIL_PATH}.get_all_file_paths_in_addon_storage') @@ -2526,7 +2610,7 @@ def test_get_all_file_paths_in_addon_storage_empty_path(self): TEST_PROVIDER, '/empty_path/', None) mock_get_file_data.assert_called() - nt.assert_equal(list_file_path, ['/empty_path/']) + nt.assert_equal(list_file_path, []) nt.assert_equal(root_child_folders, []) def test_get_all_file_paths_in_addon_storage_invalid_regex(self): @@ -2885,6 +2969,39 @@ def test_move_addon_folder_from_backup(self, mock_get_all_file_paths, mock_move_ mock_delete_paths.assert_called() nt.assert_equal(result, {}) + @patch(f'{EXPORT_DATA_UTIL_PATH}.delete_paths') + @patch(f'{EXPORT_DATA_UTIL_PATH}.move_file') + @patch(f'{EXPORT_DATA_UTIL_PATH}.get_all_file_paths_in_addon_storage') + @patch(f'{EXPORT_DATA_UTIL_PATH}.create_folder') + def test_move_addon_folder_from_backup_nextcloudinstitutions(self, mock_create_folder, mock_get_all_file_paths, mock_move_file, mock_delete_paths): + test_response = requests.Response() + test_response.status_code = status.HTTP_200_OK + + mock_get_all_file_paths.return_value = (['/backup_2022101010/b/folder/file1.txt'], []) + mock_move_file.return_value = test_response + mock_create_folder.return_value = (None, 201) + result = utils.move_addon_folder_from_backup(TEST_PROJECT_ID, 'nextcloudinstitutions', '2022101010', None) + + mock_get_all_file_paths.assert_called_once() + mock_move_file.assert_called_once() + mock_delete_paths.assert_called() + nt.assert_equal(result, {}) + + @patch(f'{EXPORT_DATA_UTIL_PATH}.delete_paths') + @patch(f'{EXPORT_DATA_UTIL_PATH}.move_file') + @patch(f'{EXPORT_DATA_UTIL_PATH}.get_all_file_paths_in_addon_storage') + @patch(f'{EXPORT_DATA_UTIL_PATH}.create_folder') + def test_move_addon_folder_from_backup_nextcloudinstitutions_create_folder_fail(self, mock_create_folder, mock_get_all_file_paths, mock_move_file, mock_delete_paths): + test_response = requests.Response() + test_response.status_code = status.HTTP_200_OK + + mock_get_all_file_paths.return_value = (['/backup_2022101010/b/folder/file1.txt'], []) + mock_move_file.return_value = test_response + mock_create_folder.return_value = (None, 400) + result = utils.move_addon_folder_from_backup(TEST_PROJECT_ID, 'nextcloudinstitutions', '2022101010', None) + + nt.assert_equal(result, {'error': 'Cannot create folder for Nextcloud for Institutions'}) + @patch(f'{EXPORT_DATA_UTIL_PATH}.delete_paths') @patch(f'{EXPORT_DATA_UTIL_PATH}.move_file') @patch(f'{EXPORT_DATA_UTIL_PATH}.get_all_file_paths_in_addon_storage') @@ -3185,22 +3302,20 @@ def test_is_add_on_storage(self): nt.assert_is_none(utils.is_add_on_storage(None)) nt.assert_is_none(utils.is_add_on_storage('osf_storage')) - # both addon method and bulk-mount method - nt.assert_false(utils.is_add_on_storage('owncloud')) - nt.assert_false(utils.is_add_on_storage('s3compat')) - nt.assert_false(utils.is_add_on_storage('s3')) - # only addon method providers nt.assert_true(utils.is_add_on_storage('nextcloudinstitutions')) nt.assert_true(utils.is_add_on_storage('s3compatinstitutions')) nt.assert_true(utils.is_add_on_storage('ociinstitutions')) nt.assert_true(utils.is_add_on_storage('dropboxbusiness')) + nt.assert_true(utils.is_add_on_storage('onedrivebusiness')) # only bulk-mount method providers - nt.assert_false(utils.is_add_on_storage('onedrivebusiness')) nt.assert_false(utils.is_add_on_storage('swift')) nt.assert_false(utils.is_add_on_storage('box')) nt.assert_false(utils.is_add_on_storage('nextcloud')) nt.assert_false(utils.is_add_on_storage('osfstorage')) nt.assert_false(utils.is_add_on_storage('onedrive')) nt.assert_false(utils.is_add_on_storage('googledrive')) + nt.assert_false(utils.is_add_on_storage('owncloud')) + nt.assert_false(utils.is_add_on_storage('s3compat')) + nt.assert_false(utils.is_add_on_storage('s3')) diff --git a/admin_tests/rdm_custom_storage_location/export_data/views/test_management.py b/admin_tests/rdm_custom_storage_location/export_data/views/test_management.py index 689c87ca9b2..3b75101340a 100644 --- a/admin_tests/rdm_custom_storage_location/export_data/views/test_management.py +++ b/admin_tests/rdm_custom_storage_location/export_data/views/test_management.py @@ -723,7 +723,7 @@ def test_check_export_data_successful(self, mock_class): request.user = self.user request.COOKIES = '213919sdasdn823193929' - def side_effect(): + def side_effect(cookie): return '', FAKE_DATA_NEW mock_class.side_effect = side_effect @@ -849,7 +849,7 @@ def test_check_restore_data_successful(self, mock_class_export, mock_class_resto request.COOKIES = '213919sdasdn823193929' request.GET = {'destination_id': 100} - def side_effect_export_data(): + def side_effect_export_data(cookie): return '', FAKE_DATA_NEW def side_effect_export_data_restore(destination_id=100): diff --git a/admin_tests/rdm_custom_storage_location/export_data/views/test_restore.py b/admin_tests/rdm_custom_storage_location/export_data/views/test_restore.py index df84ad4bc17..ea4bc508dd8 100644 --- a/admin_tests/rdm_custom_storage_location/export_data/views/test_restore.py +++ b/admin_tests/rdm_custom_storage_location/export_data/views/test_restore.py @@ -22,6 +22,7 @@ RegionFactory, OsfStorageFileFactory, ExportDataRestoreFactory, + BaseFileNodeFactory, addon_waterbutler_settings, bulkmount_waterbutler_settings, UserFactory, @@ -785,17 +786,6 @@ def test_recalculate_user_quota(self, mock_update_user_used_quota): self.view.recalculate_user_quota(self.export_data_restore.destination) mock_update_user_used_quota.assert_called() - # generate_new_file_path - def test_generate_new_file_path_not_latest_version(self): - path = '/mock_test.txt' - new_path = self.view.generate_new_file_path(path, 2, True) - nt.assert_equal(new_path, '/_version_files/mock_test_2.txt') - - def test_generate_new_file_path_latest_version(self): - path = '/mock_test.txt' - new_path = self.view.generate_new_file_path(path, 3, False) - nt.assert_equal(new_path, path) - # move_all_files_to_backup_folder def test_move_all_files_to_backup_folder_addon_storage(self): task = AbortableTask() @@ -904,9 +894,9 @@ def test_create_folder_in_destination(self, mock_check_progress, mock_create_fol # copy_files_from_export_data_to_destination @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_addon_storage(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.BaseFileNode.objects') + def test_copy_files_from_export_data_to_destination_addon_storage(self, mock_basefilenode, mock_check_progress, mock_copy): addon_export_file = self.test_export_data_files addon_export_file[0]['path'] = '/@ember-decorators/utils/collapse-proto.d.ts' addon_export_file[0]['provider'] = 'nextcloudinstitutions' @@ -918,19 +908,27 @@ def test_copy_files_from_export_data_to_destination_addon_storage(self, mock_che mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = True mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.return_value = { 'data': { 'id': 'nextcloudinstitutions/fake_id' } } + basefilenode = BaseFileNodeFactory.create(id=11, _id='11', type='osf.nextcloudinstitutionsfile', + provider='nextcloudinstitutions', + _path='/61215649851ebb71d8f1ae01f4c99', + path='/61215649851ebb71d8f1ae01f4c99', + _materialized_path='/test.txt', + target_content_type_id=59) + mock_file = mock.MagicMock() + mock_basefilenode.filter.return_value = mock_file + mock_file.exists.return_value = True + mock_file.first.return_value = basefilenode with mock.patch(f'{EXPORT_DATA_UTIL_PATH}.is_add_on_storage', mock_is_add_on): result = self.view.copy_files_from_export_data_to_destination(task, 1, addon_export_file, self.addon_data_restore, None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_called() mock_copy.assert_called() nt.assert_equal(result[0], []) @@ -938,10 +936,9 @@ def test_copy_files_from_export_data_to_destination_addon_storage(self, mock_che @mock.patch('osf.models.BaseFileNode.objects') @mock.patch('osf.models.FileVersion.objects') @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') def test_copy_files_from_export_data_to_destination_osfstorage(self, mock_check_progress, - mock_generate_new_file_path, mock_copy, + mock_copy, mock_file_version, mock_file_node, mock_base_file_node): def create_node(*args, **kwargs): @@ -967,7 +964,6 @@ def create_node(*args, **kwargs): mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.side_effect = create_node with mock.patch(f'{EXPORT_DATA_UTIL_PATH}.is_add_on_storage', mock_is_add_on): @@ -976,7 +972,6 @@ def create_node(*args, **kwargs): None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_called() nt.assert_equal(len(result), 2) nt.assert_equal(result[0][0].get('file_tags'), ['hello', 'world']) @@ -984,10 +979,9 @@ def create_node(*args, **kwargs): nt.assert_equal(result[0][0].get('project_id'), 'pmockt') @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') def test_copy_files_from_export_data_to_destination_osfstorage_not_add_new_version(self, mock_check_progress, - mock_generate_new_file_path, mock_copy): + mock_copy): def create_node(*args, **kwargs): file = OsfStorageFileFactory.create(_id='fake_id') user = AuthUserFactory.create(username='fake_user') @@ -1015,7 +1009,6 @@ def create_node(*args, **kwargs): mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.side_effect = create_node with mock.patch(f'{EXPORT_DATA_UTIL_PATH}.is_add_on_storage', mock_is_add_on): @@ -1024,7 +1017,6 @@ def create_node(*args, **kwargs): None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_called() nt.assert_equal(len(result), 2) nt.assert_equal(len(result[0][0].get('node').versions.all()), 1) @@ -1033,9 +1025,8 @@ def create_node(*args, **kwargs): nt.assert_equal(result[0][0].get('project_id'), 'pmockt') @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_other_bulk_mount_storage(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + def test_copy_files_from_export_data_to_destination_other_bulk_mount_storage(self, mock_check_progress, mock_copy): bulkmount_export_files = self.test_export_data_files bulkmount_export_files[0]['provider'] = 'box' other_bulk_mount_data_restore = self.bulk_mount_data_restore @@ -1052,7 +1043,6 @@ def test_copy_files_from_export_data_to_destination_other_bulk_mount_storage(sel mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.return_value = { 'data': { 'id': 'box/fake_id' @@ -1065,14 +1055,12 @@ def test_copy_files_from_export_data_to_destination_other_bulk_mount_storage(sel None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_called() nt.assert_equal(result[0], []) @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_empty_file_info_list(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + def test_copy_files_from_export_data_to_destination_empty_file_info_list(self, mock_check_progress, mock_copy): task = AbortableTask() task.request_stack = LocalStack() task.request.id = FAKE_TASK_ID @@ -1080,7 +1068,6 @@ def test_copy_files_from_export_data_to_destination_empty_file_info_list(self, m mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.return_value = { 'data': { 'id': 'osfstorage/fake_id' @@ -1092,14 +1079,12 @@ def test_copy_files_from_export_data_to_destination_empty_file_info_list(self, m self.export_data_restore, None) mock_is_add_on.assert_called() mock_check_progress.assert_not_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_not_called() nt.assert_equal(result[0], []) @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_empty_version(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + def test_copy_files_from_export_data_to_destination_empty_version(self, mock_check_progress, mock_copy): bulkmount_export_files = self.test_export_data_files bulkmount_export_files[0]['version'] = [] @@ -1110,7 +1095,6 @@ def test_copy_files_from_export_data_to_destination_empty_version(self, mock_che mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.return_value = { 'data': { 'id': 'osfstorage/fake_id' @@ -1122,14 +1106,12 @@ def test_copy_files_from_export_data_to_destination_empty_version(self, mock_che self.export_data_restore, None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_not_called() nt.assert_equal(result[0], []) @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_no_file_hash(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + def test_copy_files_from_export_data_to_destination_no_file_hash(self, mock_check_progress, mock_copy): bulkmount_export_files = self.test_export_data_files bulkmount_export_files[0]['version'] = [{ 'identifier': '', @@ -1146,7 +1128,6 @@ def test_copy_files_from_export_data_to_destination_no_file_hash(self, mock_chec mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.return_value = { 'data': { 'id': 'osfstorage/fake_id' @@ -1158,14 +1139,12 @@ def test_copy_files_from_export_data_to_destination_no_file_hash(self, mock_chec self.export_data_restore, None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_not_called() nt.assert_equal(result[0], []) @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_copy_error(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + def test_copy_files_from_export_data_to_destination_copy_error(self, mock_check_progress, mock_copy): bulkmount_export_files = self.test_export_data_files task = AbortableTask() @@ -1175,7 +1154,6 @@ def test_copy_files_from_export_data_to_destination_copy_error(self, mock_check_ mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.return_value = None with mock.patch(f'{EXPORT_DATA_UTIL_PATH}.is_add_on_storage', mock_is_add_on): @@ -1183,14 +1161,12 @@ def test_copy_files_from_export_data_to_destination_copy_error(self, mock_check_ self.addon_data_restore, None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_called() nt.assert_equal(result[0], []) @mock.patch(f'{EXPORT_DATA_UTIL_PATH}.copy_file_from_location_to_destination') - @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.generate_new_file_path') @mock.patch(f'{RESTORE_EXPORT_DATA_PATH}.check_if_restore_process_stopped') - def test_copy_files_from_export_data_to_destination_exception(self, mock_check_progress, mock_generate_new_file_path, mock_copy): + def test_copy_files_from_export_data_to_destination_exception(self, mock_check_progress, mock_copy): bulkmount_export_files = self.test_export_data_files test_response = requests.Response() @@ -1204,7 +1180,6 @@ def test_copy_files_from_export_data_to_destination_exception(self, mock_check_p mock_is_add_on = mock.MagicMock() mock_is_add_on.return_value = False mock_check_progress.return_value = None - mock_generate_new_file_path.return_value = '/@ember-decorators/utils/collapse-proto.d.ts' mock_copy.side_effect = Exception('Mock test exception while downloading file from export data') with mock.patch(f'{EXPORT_DATA_UTIL_PATH}.is_add_on_storage', mock_is_add_on): @@ -1212,7 +1187,6 @@ def test_copy_files_from_export_data_to_destination_exception(self, mock_check_p self.addon_data_restore, None) mock_is_add_on.assert_called() mock_check_progress.assert_called() - mock_generate_new_file_path.assert_not_called() mock_copy.assert_called() nt.assert_equal(result[0], []) diff --git a/admin_tests/rdm_custom_storage_location/test_utils.py b/admin_tests/rdm_custom_storage_location/test_utils.py index 7c620983147..751afc2af64 100644 --- a/admin_tests/rdm_custom_storage_location/test_utils.py +++ b/admin_tests/rdm_custom_storage_location/test_utils.py @@ -1,10 +1,13 @@ import pytest +from mock import patch, MagicMock from nose import tools as nt -from admin.rdm_custom_storage_location.utils import get_providers +from admin.rdm_custom_storage_location.utils import get_providers, add_node_settings_to_projects +from osf_tests.factories import InstitutionFactory, ProjectFactory, RegionFactory, bulkmount_waterbutler_settings, addon_waterbutler_settings, AuthUserFactory @pytest.mark.feature_202210 +@pytest.mark.django_db class TestUtils: def test_get_providers(self): provider_list = get_providers() @@ -31,3 +34,79 @@ def test_get_providers(self): provider_list = get_providers(available_list=available_list) provider_list_short_name = [p.short_name for p in provider_list] nt.assert_list_equal(provider_list_short_name, available_list) + + def test_add_node_settings_to_projects_bulk_mount_storage(self): + user = AuthUserFactory() + project = ProjectFactory(creator=user) + region = RegionFactory(waterbutler_settings=bulkmount_waterbutler_settings) + institution = InstitutionFactory.create(_id=region.guid) + institution.nodes.set([project]) + user.affiliated_institutions.add(institution) + + mock_dropboxbusiness_post_save = MagicMock() + mock_onedrivebusiness_post_save = MagicMock() + mock_node_post_save = MagicMock() + with patch('admin.rdm_custom_storage_location.utils.dropboxbusiness_post_save', mock_dropboxbusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.onedrivebusiness_post_save', mock_onedrivebusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.node_post_save', mock_node_post_save): + add_node_settings_to_projects(institution, 'osfstorage') + mock_dropboxbusiness_post_save.assert_not_called() + mock_onedrivebusiness_post_save.assert_not_called() + mock_node_post_save.assert_not_called() + + def test_add_node_settings_to_projects_dropboxbusiness(self): + user = AuthUserFactory() + project = ProjectFactory(creator=user) + region = RegionFactory(waterbutler_settings=addon_waterbutler_settings) + institution = InstitutionFactory.create(_id=region.guid) + institution.nodes.set([project]) + user.affiliated_institutions.add(institution) + + mock_dropboxbusiness_post_save = MagicMock() + mock_onedrivebusiness_post_save = MagicMock() + mock_node_post_save = MagicMock() + with patch('admin.rdm_custom_storage_location.utils.dropboxbusiness_post_save', mock_dropboxbusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.onedrivebusiness_post_save', mock_onedrivebusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.node_post_save', mock_node_post_save): + add_node_settings_to_projects(institution, 'dropboxbusiness') + mock_dropboxbusiness_post_save.assert_called() + mock_onedrivebusiness_post_save.assert_not_called() + mock_node_post_save.assert_not_called() + + def test_add_node_settings_to_projects_onedrivebusiness(self): + user = AuthUserFactory() + project = ProjectFactory(creator=user) + region = RegionFactory(waterbutler_settings=addon_waterbutler_settings) + institution = InstitutionFactory.create(_id=region.guid) + institution.nodes.set([project]) + user.affiliated_institutions.add(institution) + + mock_dropboxbusiness_post_save = MagicMock() + mock_onedrivebusiness_post_save = MagicMock() + mock_node_post_save = MagicMock() + with patch('admin.rdm_custom_storage_location.utils.dropboxbusiness_post_save', mock_dropboxbusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.onedrivebusiness_post_save', mock_onedrivebusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.node_post_save', mock_node_post_save): + add_node_settings_to_projects(institution, 'onedrivebusiness') + mock_dropboxbusiness_post_save.assert_not_called() + mock_onedrivebusiness_post_save.assert_called() + mock_node_post_save.assert_not_called() + + def test_add_node_settings_to_projects_other_add_on_storage(self): + user = AuthUserFactory() + project = ProjectFactory(creator=user) + region = RegionFactory(waterbutler_settings=addon_waterbutler_settings) + institution = InstitutionFactory.create(_id=region.guid) + institution.nodes.set([project]) + user.affiliated_institutions.add(institution) + + mock_dropboxbusiness_post_save = MagicMock() + mock_onedrivebusiness_post_save = MagicMock() + mock_node_post_save = MagicMock() + with patch('admin.rdm_custom_storage_location.utils.dropboxbusiness_post_save', mock_dropboxbusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.onedrivebusiness_post_save', mock_onedrivebusiness_post_save): + with patch('admin.rdm_custom_storage_location.utils.node_post_save', mock_node_post_save): + add_node_settings_to_projects(institution, 'nextcloudinstitutions') + mock_dropboxbusiness_post_save.assert_not_called() + mock_onedrivebusiness_post_save.assert_not_called() + mock_node_post_save.assert_called() diff --git a/osf/models/export_data.py b/osf/models/export_data.py index eefe7dc441d..30e39696e7b 100644 --- a/osf/models/export_data.py +++ b/osf/models/export_data.py @@ -2,6 +2,7 @@ import logging import os.path +import hashlib import requests from django.db import models @@ -22,6 +23,7 @@ from admin.base import settings as admin_settings from osf.utils.datetime_aware_jsonfield import DateTimeAwareJSONField from osf.utils.fields import EncryptedJSONField +from website.settings import INSTITUTIONAL_STORAGE_BULK_MOUNT_METHOD logger = logging.getLogger(__name__) @@ -29,6 +31,7 @@ 'DateTruncMixin', 'SecondDateTimeField', 'ExportData', + 'get_hashes_from_metadata' ] @@ -48,6 +51,26 @@ def truncate_date(self, dt): return dt.replace(microsecond=0) +def get_hashes_from_metadata(provider_name, extra, hash_name): + """ Get hash value from extra value in metadata""" + value = extra.get(hash_name) + extra_hashes = extra.get('hashes', {}) + if not value: + # Try to get hash value by hash name in extra + value = extra_hashes.get(hash_name) + + if not value: + extra_provider_value = extra_hashes.get(provider_name) + if hash_name == 'sha256' and provider_name == 'dropboxbusiness': + # Dropbox Business: get sha256 from extra + value = extra_provider_value + elif type(extra_provider_value) is dict: + # Other: try to get hash value by hash name in extra[] + value = extra_provider_value.get(hash_name) + + return value + + class ExportData(base.BaseModel): STATUS_RUNNING = 'Running' STATUS_STOPPING = 'Stopping' @@ -93,7 +116,7 @@ def __repr__(self): __str__ = __repr__ - def extract_file_information_json_from_source_storage(self): + def extract_file_information_json_from_source_storage(self, **kwargs): # Get region guid == institution guid source_storage_guid = self.source.guid # Get Institution by guid @@ -126,20 +149,6 @@ def extract_file_information_json_from_source_storage(self): 'institution': institution_json, } - # If source is NII storage, also get default storage - if self.source.provider_name == 'osfstorage' and self.source.id != 1: - # get list FileVersion linked to source storage, default storage - # but the creator must be affiliated with current institution - file_versions = FileVersion.objects.filter(region_id__in=[1, self.source.id], creator__affiliated_institutions___id=source_storage_guid) - else: - # get list FileVersion linked to source storage - file_versions = self.source.fileversion_set.all() - # but the creator must be affiliated with current institution - file_versions = file_versions.filter(creator__affiliated_institutions___id=source_storage_guid) - - # get base_file_nodes__ids by file_versions__ids above via the BaseFileVersionsThrough model - base_file_versions_set = BaseFileVersionsThrough.objects.filter(fileversion__in=file_versions) - base_file_nodes__ids = base_file_versions_set.values_list('basefilenode_id', flat=True).distinct('basefilenode_id') # get project list, includes public/private/deleted projects projects = institution.nodes.filter(type='osf.node', is_deleted=False) institution_users = institution.osfuser_set.all() @@ -150,7 +159,6 @@ def extract_file_information_json_from_source_storage(self): # If source is not NII storage, only get projects that belongs to that source institutional storage if self.source.provider_name != 'osfstorage' and self.source.id != 1: projects__ids = projects.filter(addons_osfstorage_node_settings__region=self.source).values_list('id', flat=True) - source_project_ids = set() # get folder nodes base_folder_nodes = BaseFileNode.objects.filter( @@ -167,7 +175,6 @@ def extract_file_information_json_from_source_storage(self): } # project project = folder.target - source_project_ids.add(project.id) project_info = { 'id': project._id, 'name': project.title, @@ -175,22 +182,48 @@ def extract_file_information_json_from_source_storage(self): folder_info['project'] = project_info folders.append(folder_info) - # get base_file_nodes - base_file_nodes = BaseFileNode.objects.filter( - id__in=base_file_nodes__ids, - target_object_id__in=projects__ids, - deleted=None) + if self.source.provider_name in INSTITUTIONAL_STORAGE_BULK_MOUNT_METHOD: + # Bulk-mount storage + # If source is NII storage, also get default storage + if self.source.provider_name == 'osfstorage' and self.source.id != 1: + # get list FileVersion linked to source storage, default storage + # but the creator must be affiliated with current institution + file_versions = FileVersion.objects.filter(region_id__in=[1, self.source.id], creator__affiliated_institutions___id=source_storage_guid) + else: + # get list FileVersion linked to source storage + file_versions = self.source.fileversion_set.all() + # but the creator must be affiliated with current institution + file_versions = file_versions.filter(creator__affiliated_institutions___id=source_storage_guid) + + # get base_file_nodes__ids by file_versions__ids above via the BaseFileVersionsThrough model + base_file_versions_set = BaseFileVersionsThrough.objects.filter(fileversion__in=file_versions) + base_file_nodes__ids = base_file_versions_set.values_list('basefilenode_id', flat=True).distinct('basefilenode_id') + + # get base_file_nodes + base_file_nodes = BaseFileNode.objects.filter( + id__in=base_file_nodes__ids, + target_object_id__in=projects__ids, + deleted=None) + else: + # Add-on storage + base_file_nodes = BaseFileNode.objects.filter( + type='osf.{}file'.format(self.source.provider_name), + provider=self.source.provider_name, + target_object_id__in=projects__ids, + deleted=None) + total_size = 0 total_file = 0 files = [] # get file information for file in base_file_nodes: + file_provider = file.provider file_info = { 'id': file.id, 'path': file.path, 'materialized_path': file.materialized_path, 'name': file.name, - 'provider': file.provider, + 'provider': file_provider, 'created_at': file.created.strftime('%Y-%m-%d %H:%M:%S'), 'modified_at': file.modified.strftime('%Y-%m-%d %H:%M:%S'), 'project': {}, @@ -204,7 +237,6 @@ def extract_file_information_json_from_source_storage(self): # project project = file.target - source_project_ids.add(project.id) project_info = { 'id': project._id, 'name': project.title, @@ -235,28 +267,97 @@ def extract_file_information_json_from_source_storage(self): } file_info['timestamp'] = timestamp_info - # file versions - file_versions = file.versions.order_by('-created') - file_versions_info = [] - for version in file_versions: - file_version_thru = version.get_basefilenode_version(file) - version_info = { - 'identifier': version.identifier, - 'created_at': version.created.strftime('%Y-%m-%d %H:%M:%S'), - 'modified_at': version.modified.strftime('%Y-%m-%d %H:%M:%S'), - 'size': version.size, - 'version_name': file_version_thru.version_name if file_version_thru else file.name, - 'contributor': version.creator.username, - 'metadata': version.metadata, - 'location': version.location, - } - file_versions_info.append(version_info) - total_file += 1 - total_size += version.size - - file_info['version'] = file_versions_info - file_info['size'] = file_versions_info[-1]['size'] - file_info['location'] = file_versions_info[-1]['location'] + if file_provider == 'osfstorage': + # file versions + file_versions = file.versions.order_by('-created') + file_versions_info = [] + for version in file_versions: + file_version_thru = version.get_basefilenode_version(file) + version_info = { + 'identifier': version.identifier, + 'created_at': version.created.strftime('%Y-%m-%d %H:%M:%S'), + 'modified_at': version.modified.strftime('%Y-%m-%d %H:%M:%S'), + 'size': version.size, + 'version_name': file_version_thru.version_name if file_version_thru else file.name, + 'contributor': version.creator.username, + 'metadata': version.metadata, + 'location': version.location, + } + file_versions_info.append(version_info) + total_file += 1 + total_size += version.size + + file_info['version'] = file_versions_info + if file_versions_info: + file_info['size'] = file_versions_info[-1]['size'] + file_info['location'] = file_versions_info[-1]['location'] + else: + file_version_url = waterbutler_api_url_for( + file.target._id, file_provider, file.path, _internal=True, versions='', **kwargs + ) + file_versions_res = requests.get(file_version_url) + if file_versions_res.status_code != 200: + continue + + # Get file versions + file_versions = file_versions_res.json().get('data', []) + file_versions_info = [] + + for version in file_versions: + version_attributes = version.get('attributes', {}) + version_identifier = version_attributes.get('version') + version_info = { + 'identifier': version_identifier, + 'contributor': '', # External storage does not store who really uploaded file + 'location': {}, + } + + # Get metadata with file version + metadata_url = waterbutler_api_url_for( + file.target._id, file_provider, file.path, _internal=True, meta='', version=version_identifier, **kwargs + ) + metadata_res = requests.get(metadata_url) + if metadata_res.status_code != 200: + continue + + metadata_data = metadata_res.json().get('data', {}) + metadata_attributes = metadata_data.get('attributes', {}) + metadata_extra = metadata_attributes.get('extra', {}) + + sha256 = get_hashes_from_metadata(file_provider, metadata_extra, 'sha256') + md5 = get_hashes_from_metadata(file_provider, metadata_extra, 'md5') + sha1 = get_hashes_from_metadata(file_provider, metadata_extra, 'sha1') + sha512 = get_hashes_from_metadata(file_provider, metadata_extra, 'sha512') + if sha256 is not None: + metadata_attributes['sha256'] = sha256 + if md5 is not None: + metadata_attributes['md5'] = md5 + if sha1 is not None: + metadata_attributes['sha1'] = sha1 + if sha512 is not None: + metadata_attributes['sha512'] = sha512 + version_info['version_name'] = metadata_attributes.get('name', file.name) + version_info['created_at'] = metadata_attributes.get('created_utc') + version_info['size'] = metadata_attributes.get('sizeInt') + version_info['modified_at'] = metadata_attributes.get('modified_utc', metadata_attributes.get('modified')) + if file_provider == 'onedrivebusiness': + # Get quick XOR hash + quick_xor_hash = get_hashes_from_metadata(file_provider, metadata_extra, 'quickXorHash') + metadata_attributes['quickXorHash'] = quick_xor_hash + # OneDrive Business does not keep old version info in metadata API, get some info from version API instead + version_extra = version_attributes.get('extra', {}) + version_info['size'] = version_extra.get('size') + version_info['modified_at'] = version_attributes.get('modified_utc', version_attributes.get('modified')) + version_info['metadata'] = metadata_attributes + + total_file += 1 + total_size += version_info['size'] + file_versions_info.append(version_info) + + file_info['version'] = file_versions_info + if file_versions_info: + file_info['size'] = file_versions_info[-1]['size'] + file_info['location'] = file_versions_info[-1]['location'] files.append(file_info) file_info_json['folders'] = folders @@ -276,11 +377,23 @@ def get_source_file_versions_min(self, file_info_json): file_path = file.get('path') versions = file.get('version', []) file_id = file.get('id') - for version in versions: + for index, version in enumerate(versions): identifier = version.get('identifier') + modified_at = version.get('modified_at') + if identifier == 'null' and provider == 'ociinstitutions': + # OCI for Institutions: fix download error if version is latest + identifier = None + if index == 0 and provider == 'nextcloudinstitutions': + # Nextcloud for Institutions: fix download error if version is latest + identifier = None metadata = version.get('metadata') # get metadata.get('sha256', metadata.get('md5', metadata.get('sha512', metadata.get('sha1', metadata.get('name'))))) - file_name = metadata.get('sha256', metadata.get('md5')) + file_name = metadata.get('sha256', metadata.get('md5', metadata.get('sha512', metadata.get('sha1')))) + if provider == 'onedrivebusiness': + # OneDrive Business: get new hash based on quickXorHash and file version modified time + quick_xor_hash = metadata.get('quickXorHash') + new_string_to_hash = f'{quick_xor_hash}{modified_at}' + file_name = hashlib.sha256(new_string_to_hash.encode('utf-8')).hexdigest() file_versions.append((project_id, provider, file_path, identifier, file_name, file_id,)) return file_versions diff --git a/osf/models/export_data_restore.py b/osf/models/export_data_restore.py index b8265905996..2b81563e4cf 100644 --- a/osf/models/export_data_restore.py +++ b/osf/models/export_data_restore.py @@ -16,7 +16,8 @@ RdmFileTimestamptokenVerifyResult, AbstractNode, ) -from osf.models.export_data import SecondDateTimeField +from osf.models.export_data import SecondDateTimeField, get_hashes_from_metadata +from website.settings import INSTITUTIONAL_STORAGE_BULK_MOUNT_METHOD logger = logging.getLogger(__name__) @@ -44,7 +45,7 @@ def __repr__(self): __str__ = __repr__ - def extract_file_information_json_from_destination_storage(self): + def extract_file_information_json_from_destination_storage(self, **kwargs): # Get region guid == institution guid destination_storage_guid = self.destination.guid @@ -78,15 +79,6 @@ def extract_file_information_json_from_destination_storage(self): 'institution': institution_json, } - # get list FileVersion linked to destination storage - file_versions = self.destination.fileversion_set.all() - # but the creator must be affiliated with current institution - file_versions = file_versions.filter(creator__affiliated_institutions___id=destination_storage_guid) - - # get base_file_nodes__ids by file_versions__ids above via the BaseFileVersionsThrough model - base_file_versions_set = BaseFileVersionsThrough.objects.filter(fileversion__in=file_versions) - base_file_nodes__ids = base_file_versions_set.values_list('basefilenode_id', flat=True).distinct('basefilenode_id') - # get project list, includes public/private/deleted projects projects = institution.nodes.filter(type='osf.node', is_deleted=False) institution_users = institution.osfuser_set.all() @@ -94,25 +86,65 @@ def extract_file_information_json_from_destination_storage(self): # Combine two project lists and remove duplicates if have projects = projects.union(institution_users_projects) projects__ids = projects.values_list('id', flat=True) - destination_project_ids = set() - # get base_file_nodes - base_file_nodes = BaseFileNode.objects.filter( - id__in=base_file_nodes__ids, + # get folder nodes + base_folder_nodes = BaseFileNode.objects.filter( + # type='osf.{}folder'.format(self.source.provider_short_name), + type__endswith='folder', target_object_id__in=projects__ids, deleted=None) + folders = [] + for folder in base_folder_nodes: + folder_info = { + 'path': folder.path, + 'materialized_path': folder.materialized_path, + 'project': {} + } + # project + project = folder.target + project_info = { + 'id': project._id, + 'name': project.title, + } + folder_info['project'] = project_info + folders.append(folder_info) + + if self.destination.provider_name in INSTITUTIONAL_STORAGE_BULK_MOUNT_METHOD: + # Bulk-mount storage + # get list FileVersion linked to destination storage + file_versions = self.destination.fileversion_set.all() + # but the creator must be affiliated with current institution + file_versions = file_versions.filter(creator__affiliated_institutions___id=destination_storage_guid) + + # get base_file_nodes__ids by file_versions__ids above via the BaseFileVersionsThrough model + base_file_versions_set = BaseFileVersionsThrough.objects.filter(fileversion__in=file_versions) + base_file_nodes__ids = base_file_versions_set.values_list('basefilenode_id', flat=True).distinct('basefilenode_id') + + # get base_file_nodes + base_file_nodes = BaseFileNode.objects.filter( + id__in=base_file_nodes__ids, + target_object_id__in=projects__ids, + deleted=None) + else: + # Add-on storage + base_file_nodes = BaseFileNode.objects.filter( + type='osf.{}file'.format(self.destination.provider_name), + provider=self.destination.provider_name, + target_object_id__in=projects__ids, + deleted=None) total_size = 0 total_file = 0 files = [] # get file information for file in base_file_nodes: + file_provider = file.provider file_info = { 'id': file.id, 'path': file.path, 'materialized_path': file.materialized_path, 'name': file.name, - 'provider': file.provider, + 'provider': file_provider, 'created_at': file.created.strftime('%Y-%m-%d %H:%M:%S'), 'modified_at': file.modified.strftime('%Y-%m-%d %H:%M:%S'), 'project': {}, @@ -126,7 +158,6 @@ def extract_file_information_json_from_destination_storage(self): # project project = file.target - destination_project_ids.add(project.id) project_info = { 'id': project._id, 'name': project.title, @@ -157,35 +188,106 @@ def extract_file_information_json_from_destination_storage(self): } file_info['timestamp'] = timestamp_info - # file versions - file_versions = file.versions.order_by('-created') - file_versions_info = [] - for version in file_versions: - file_version_thru = version.get_basefilenode_version(file) - version_info = { - 'identifier': version.identifier, - 'created_at': version.created.strftime('%Y-%m-%d %H:%M:%S'), - 'modified_at': version.modified.strftime('%Y-%m-%d %H:%M:%S'), - 'size': version.size, - 'version_name': file_version_thru.version_name if file_version_thru else file.name, - 'contributor': version.creator.username, - 'metadata': version.metadata, - 'location': version.location, - } - file_versions_info.append(version_info) - total_file += 1 - total_size += version.size - - file_info['version'] = file_versions_info - file_info['size'] = file_versions_info[-1]['size'] - file_info['location'] = file_versions_info[-1]['location'] + if file_provider == 'osfstorage': + # file versions + file_versions = file.versions.order_by('-created') + file_versions_info = [] + for version in file_versions: + file_version_thru = version.get_basefilenode_version(file) + version_info = { + 'identifier': version.identifier, + 'created_at': version.created.strftime('%Y-%m-%d %H:%M:%S'), + 'modified_at': version.modified.strftime('%Y-%m-%d %H:%M:%S'), + 'size': version.size, + 'version_name': file_version_thru.version_name if file_version_thru else file.name, + 'contributor': version.creator.username, + 'metadata': version.metadata, + 'location': version.location, + } + file_versions_info.append(version_info) + total_file += 1 + total_size += version.size + + file_info['version'] = file_versions_info + if file_versions_info: + file_info['size'] = file_versions_info[-1]['size'] + file_info['location'] = file_versions_info[-1]['location'] + else: + file_version_url = waterbutler_api_url_for( + file.target._id, file_provider, file.path, _internal=True, versions='', **kwargs + ) + file_versions_res = requests.get(file_version_url) + if file_versions_res.status_code != 200: + continue + + # Get file versions + file_versions = file_versions_res.json().get('data', []) + file_versions_info = [] + + for version in file_versions: + version_attributes = version.get('attributes', {}) + version_identifier = version_attributes.get('version') + version_info = { + 'identifier': version_identifier, + 'contributor': '', # External storage does not store who really uploaded file + 'location': {}, + } + + # Get metadata with file version + metadata_url = waterbutler_api_url_for( + file.target._id, file_provider, file.path, _internal=True, meta='', version=version_identifier, **kwargs + ) + metadata_res = requests.get(metadata_url) + if metadata_res.status_code != 200: + continue + + metadata_data = metadata_res.json().get('data', {}) + metadata_attributes = metadata_data.get('attributes', {}) + metadata_extra = metadata_attributes.get('extra', {}) + + sha256 = get_hashes_from_metadata(file_provider, metadata_extra, 'sha256') + md5 = get_hashes_from_metadata(file_provider, metadata_extra, 'md5') + sha1 = get_hashes_from_metadata(file_provider, metadata_extra, 'sha1') + sha512 = get_hashes_from_metadata(file_provider, metadata_extra, 'sha512') + if sha256 is not None: + metadata_attributes['sha256'] = sha256 + if md5 is not None: + metadata_attributes['md5'] = md5 + if sha1 is not None: + metadata_attributes['sha1'] = sha1 + if sha512 is not None: + metadata_attributes['sha512'] = sha512 + version_info['version_name'] = metadata_attributes.get('name', file.name) + version_info['created_at'] = metadata_attributes.get('created_utc') + version_info['size'] = metadata_attributes.get('sizeInt') + version_info['modified_at'] = metadata_attributes.get('modified_utc', metadata_attributes.get('modified')) + if file_provider == 'onedrivebusiness': + # Get quick XOR hash + quick_xor_hash = get_hashes_from_metadata(file_provider, metadata_extra, 'quickXorHash') + metadata_attributes['quickXorHash'] = quick_xor_hash + # OneDrive Business does not keep old version info in metadata API, get some info from version API instead + version_extra = version_attributes.get('extra', {}) + version_info['metadata'] = metadata_attributes + version_info['size'] = version_extra.get('size') + version_info['modified_at'] = version_attributes.get('modified_utc', version_attributes.get('modified')) + version_info['metadata'] = metadata_attributes + + total_file += 1 + total_size += version_info['size'] + file_versions_info.append(version_info) + + file_info['version'] = file_versions_info + if file_versions_info: + file_info['size'] = file_versions_info[-1]['size'] + file_info['location'] = file_versions_info[-1]['location'] files.append(file_info) + file_info_json['folders'] = folders file_info_json['files'] = files export_data_json['files_numb'] = total_file export_data_json['size'] = total_size - export_data_json['projects_numb'] = len(destination_project_ids) + export_data_json['projects_numb'] = len(projects__ids) return export_data_json, file_info_json diff --git a/osf_tests/factories.py b/osf_tests/factories.py index c17f354d7eb..d48eee67d31 100644 --- a/osf_tests/factories.py +++ b/osf_tests/factories.py @@ -1041,6 +1041,7 @@ class Meta: 'fileaccess_token': 'file_abc', } } + addon_waterbutler_settings = { 'storage': { 'provider': 'nextcloudinstitutions', @@ -1065,6 +1066,28 @@ class Meta: waterbutler_url = 'http://123.456.test.woo' +class RegionFactoryInstitutionalAddon(DjangoModelFactory): + class Meta: + model = Region + + name = factory.Sequence(lambda n: 'Region2 {0}'.format(n)) + _id = factory.Sequence(lambda n: 'us_east2_{0}'.format(n)) + waterbutler_credentials = generic_waterbutler_credentials + waterbutler_settings = addon_waterbutler_settings + waterbutler_url = 'http://123.456.test.woo' + + +class RegionFactoryInstitutionalBulkMount(DjangoModelFactory): + class Meta: + model = Region + + name = factory.Sequence(lambda n: 'Region3 {0}'.format(n)) + _id = factory.Sequence(lambda n: 'us_east3_{0}'.format(n)) + waterbutler_credentials = generic_waterbutler_credentials + waterbutler_settings = bulkmount_waterbutler_settings + waterbutler_url = 'http://123.456.test.woo' + + class ProviderAssetFileFactory(DjangoModelFactory): class Meta: model = models.ProviderAssetFile @@ -1165,6 +1188,18 @@ class Meta: creator = factory.SubFactory(UserFactory) +class ExportDataFactoryAddon(DjangoModelFactory): + class Meta: + model = models.ExportData + + location = factory.SubFactory(ExportDataLocationFactory) + source = factory.SubFactory(RegionFactoryInstitutionalAddon) + process_start = datetime.datetime.now() + is_deleted = False + status = models.ExportData.STATUS_COMPLETED + creator = factory.SubFactory(UserFactory) + + class ExportDataRestoreFactory(DjangoModelFactory): class Meta: model = models.ExportDataRestore @@ -1176,6 +1211,28 @@ class Meta: creator = factory.SubFactory(UserFactory) +class ExportDataRestoreBulkMountFactory(DjangoModelFactory): + class Meta: + model = models.ExportDataRestore + + export = factory.SubFactory(ExportDataFactory) + destination = factory.SubFactory(RegionFactoryInstitutionalBulkMount) + process_start = datetime.datetime.now() + status = models.ExportData.STATUS_COMPLETED + creator = factory.SubFactory(UserFactory) + + +class ExportDataRestoreAddonFactory(DjangoModelFactory): + class Meta: + model = models.ExportDataRestore + + export = factory.SubFactory(ExportDataFactory) + destination = factory.SubFactory(RegionFactoryInstitutionalAddon) + process_start = datetime.datetime.now() + status = models.ExportData.STATUS_COMPLETED + creator = factory.SubFactory(UserFactory) + + class ContentTypeFactory(DjangoModelFactory): class Meta: model = ContentType @@ -1212,9 +1269,9 @@ class Meta: id = 1 provider = 'osfstorage' - path = 'fake_path' + path = '/fake_path' name = factory.Faker('company') - + type = 'osf.osfstoragefile' class BaseFileVersionsThroughFactory(DjangoModelFactory): class Meta: diff --git a/osf_tests/test_export_data.py b/osf_tests/test_export_data.py index 1c75f2cdde3..4253adb583a 100644 --- a/osf_tests/test_export_data.py +++ b/osf_tests/test_export_data.py @@ -11,6 +11,7 @@ from osf.models.export_data import DateTruncMixin from osf_tests.factories import ( ExportDataFactory, + ExportDataFactoryAddon, InstitutionFactory, ProjectFactory, OsfStorageFileFactory, @@ -19,7 +20,9 @@ ExportDataRestoreFactory, RegionFactory, bulkmount_waterbutler_settings, + BaseFileNodeFactory, ) +from osf.models.export_data import get_hashes_from_metadata FAKE_DATA = { 'institution': { @@ -370,6 +373,91 @@ def test_extract_file_information_json_from_source_storage_not_institution(self) result = self.export_data.extract_file_information_json_from_source_storage() nt.assert_is_none(result) + def test_get_hashes_from_metadata(self): + result = get_hashes_from_metadata(provider_name='s3', extra={'hashes': {'md5': 'test'}}, hash_name='md5') + nt.assert_is_not_none(result) + + def test_get_hashes_from_metadata_dropboxbusiness(self): + result = get_hashes_from_metadata(provider_name='dropboxbusiness', extra={'hashes': {'dropboxbusiness': 'test'}}, hash_name='sha256') + nt.assert_is_not_none(result) + + def test_get_hashes_from_metadata_dropboxbusiness_dict(self): + result = get_hashes_from_metadata(provider_name='dropboxbusiness', extra={'hashes': {'dropboxbusiness': {'md5': 'test'}}}, hash_name='md5') + nt.assert_is_not_none(result) + + +@pytest.mark.django_db +class TestExportDataInstitutionAddon(TestCase): + @classmethod + def setUpTestData(cls): + cls.export_data = ExportDataFactoryAddon() + cls.institution = InstitutionFactory.create(_id='vcu') + project = ProjectFactory() + cls.institution = InstitutionFactory.create(_id=cls.export_data.source.guid) + cls.institution.nodes.set([project]) + cls.institution_json = { + 'id': cls.institution.id, + 'guid': cls.institution.guid, + 'name': cls.institution.name + } + cls.export_data_json = { + 'institution': cls.institution_json, + 'process_start': cls.export_data.process_start.strftime('%Y-%m-%d %H:%M:%S'), + 'process_end': cls.export_data.process_end.strftime( + '%Y-%m-%d %H:%M:%S') if cls.export_data.process_end else None, + 'storage': { + 'name': cls.export_data.source.name, + 'type': cls.export_data.source.provider_full_name + }, + 'projects_numb': 1, + 'files_numb': 1, + 'size': -1, + 'file_path': None + } + + projects = cls.institution.nodes.filter(type='osf.node') + projects__ids = projects.values_list('id', flat=True) + object_id = projects__ids[0] + target = AbstractNode(id=object_id) + node = BaseFileNodeFactory.create(provider=cls.export_data.source.provider_name, target_object_id=object_id, target=target) + cls.file = node + + def test_extract_file_information_json_from_source_institutional_addon_storage(self): + mock_request = mock.MagicMock() + mock_request_json = mock.MagicMock() + mock_request_json.status_code = 200 + mock_request_json.json.side_effect = [{'data': [{'attributes': {'version': 1}}, {'attributes': {'version': 2}}]}, + {'data': {'attributes': {'sizeInt': 1, 'name': 'test', 'extra': {'hashes': {'md5': 'test'}}}}}, + {'data': {'attributes': {'sizeInt': 1, 'name': 'test', 'extra': {'hashes': {'sha256': 'test'}}}}}] + mock_request.get.return_value = mock_request_json + + with mock.patch('osf.models.export_data.requests', mock_request): + result = self.export_data.extract_file_information_json_from_source_storage() + nt.assert_is_not_none(result) + + def test_extract_file_information_json_from_source_institutional_addon_storage_onedrivebusiness(self): + mock_request = mock.MagicMock() + mock_request_json = mock.MagicMock() + mock_request_json.status_code = 200 + mock_request_json.json.side_effect = [{'data': [{'attributes': {'version': 1}}]}, + {'data': {'attributes': {'sizeInt': 1, 'name': 'test', 'etag': 'test', 'extra': {'hashes': {}}}}}, + {'content': 'test'}] + mock_request.get.return_value = mock_request_json + + with mock.patch('osf.models.export_data.requests', mock_request): + result = self.export_data.extract_file_information_json_from_source_storage() + nt.assert_is_not_none(result) + + def test_extract_file_information_json_from_source_institutional_addon_storage_get_file_version_error(self): + mock_request = mock.MagicMock() + mock_request_json = mock.MagicMock() + mock_request_json.status_code = 404 + mock_request.get.return_value = mock_request_json + + with mock.patch('osf.models.export_data.requests', mock_request): + result = self.export_data.extract_file_information_json_from_source_storage() + nt.assert_is_not_none(result) + @pytest.mark.feature_202210 @pytest.mark.django_db @@ -427,3 +515,7 @@ def test_truncate_date(self): fake_data = 'fake_value' res = self.date_mixin.truncate_date(fake_data) nt.assert_equal(res, fake_data) + + def test_truncate_date_none_value(self): + res = self.date_mixin.truncate_date(None) + nt.assert_is_none(res) diff --git a/osf_tests/test_export_data_restore.py b/osf_tests/test_export_data_restore.py index 80b72822cb7..244dc7f417e 100644 --- a/osf_tests/test_export_data_restore.py +++ b/osf_tests/test_export_data_restore.py @@ -1,6 +1,7 @@ import copy from datetime import datetime +import mock import pytest import requests from django.test import TestCase @@ -15,9 +16,11 @@ ProjectFactory, InstitutionFactory, OsfStorageFileFactory, - ExportDataRestoreFactory, + ExportDataRestoreBulkMountFactory, + ExportDataRestoreAddonFactory, BaseFileVersionsThroughFactory, RdmFileTimestamptokenVerifyResultFactory, + BaseFileNodeFactory ) @@ -26,7 +29,7 @@ class TestExportDataRestore(TestCase): @classmethod def setUpTestData(cls): - cls.data_restore = ExportDataRestoreFactory() + cls.data_restore = ExportDataRestoreBulkMountFactory() project = ProjectFactory() cls.institution = InstitutionFactory.create(_id=cls.data_restore.destination.guid) cls.institution.nodes.set([project]) @@ -78,7 +81,7 @@ def setUpTestData(cls): 'version': [{ 'identifier': file_version.identifier, 'created_at': file_version.created.strftime('%Y-%m-%d %H:%M:%S'), - 'modified_at': file_version.created.strftime('%Y-%m-%d %H:%M:%S'), + 'modified_at': file_version.modified.strftime('%Y-%m-%d %H:%M:%S'), 'size': file_version.size, 'version_name': file_versions_through.version_name if file_versions_through else node.name, 'contributor': file_version.creator.username, @@ -122,7 +125,7 @@ def test_extract_file_information_json_from_destination_storage(self): nt.assert_equal(file_info_first_file.get('timestamp'), test_file_info_file.get('timestamp')) def test_extract_file_information_json_from_destination_storage_institution_not_found(self): - export_data_restore_without_institution = ExportDataRestoreFactory.build() + export_data_restore_without_institution = ExportDataRestoreBulkMountFactory.build() result = export_data_restore_without_institution.extract_file_information_json_from_destination_storage() nt.assert_is_none(result) @@ -202,3 +205,75 @@ def test_update(self): current_datetime = datetime.now() self.data_restore.update(process_end=current_datetime) nt.assert_equal(self.data_restore.process_end, current_datetime) + + +@pytest.mark.django_db +class TestExportDataRestoreInstitutionAddon(TestCase): + @classmethod + def setUpTestData(cls): + cls.data_restore = ExportDataRestoreAddonFactory() + project = ProjectFactory() + cls.institution = InstitutionFactory.create(_id=cls.data_restore.destination.guid) + cls.institution.nodes.set([project]) + cls.institution_json = { + 'id': cls.institution.id, + 'guid': cls.institution.guid, + 'name': cls.institution.name + } + cls.export_data_json = { + 'institution': cls.institution_json, + 'process_start': cls.data_restore.process_start.strftime('%Y-%m-%d %H:%M:%S'), + 'process_end': cls.data_restore.process_end.strftime( + '%Y-%m-%d %H:%M:%S') if cls.data_restore.process_end else None, + 'storage': { + 'name': cls.data_restore.destination.name, + 'type': cls.data_restore.destination.provider_full_name + }, + 'projects_numb': 1, + 'files_numb': 1, + 'size': -1, + 'file_path': None + } + + projects = cls.institution.nodes.filter(type='osf.node') + projects__ids = projects.values_list('id', flat=True) + object_id = projects__ids[0] + target = AbstractNode(id=object_id) + node = BaseFileNodeFactory.create(provider=cls.data_restore.destination.provider_name, target_object_id=object_id, target=target) + cls.file = node + + def test_extract_file_information_json_from_destination_institutional_addon_storage(self): + mock_request = mock.MagicMock() + mock_request_json = mock.MagicMock() + mock_request_json.status_code = 200 + mock_request_json.json.side_effect = [{'data': [{'attributes': {'version': 1}}, {'attributes': {'version': 2}}]}, + {'data': {'attributes': {'sizeInt': 1, 'name': 'test', 'extra': {'hashes': {'md5': 'test'}}}}}, + {'data': {'attributes': {'sizeInt': 1, 'name': 'test', 'etag': 'test', 'extra': {'hashes': {}}}}}] + mock_request.get.return_value = mock_request_json + + with mock.patch('osf.models.export_data_restore.requests', mock_request): + result = self.data_restore.extract_file_information_json_from_destination_storage() + nt.assert_is_not_none(result) + + def test_extract_file_information_json_from_destination_institutional_onedrivebusiness(self): + mock_request = mock.MagicMock() + mock_request_json = mock.MagicMock() + mock_request_json.status_code = 200 + mock_request_json.json.side_effect = [{'data': [{'attributes': {'version': 1}}]}, + {'data': {'attributes': {'sizeInt': 1, 'name': 'test', 'etag': 'test', 'extra': {'hashes': {}}}}}, + {'content': 'test'}] + mock_request.get.return_value = mock_request_json + + with mock.patch('osf.models.export_data_restore.requests', mock_request): + result = self.data_restore.extract_file_information_json_from_destination_storage() + nt.assert_is_not_none(result) + + def test_extract_file_information_json_from_destination_institutional_addon_storage_get_file_version_error(self): + mock_request = mock.MagicMock() + mock_request_json = mock.MagicMock() + mock_request_json.status_code = 404 + mock_request.get.return_value = mock_request_json + + with mock.patch('osf.models.export_data_restore.requests', mock_request): + result = self.data_restore.extract_file_information_json_from_destination_storage() + nt.assert_is_not_none(result) From 68b006a4337723ff1990741314f487b7562cfe6f Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Thu, 3 Aug 2023 18:58:15 +0700 Subject: [PATCH 02/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Implement=20fix=20for?= =?UTF-8?q?=20setting=20up=20Oracle=20Cloud=20and=20implement=20update=20a?= =?UTF-8?q?nd=20path=20in=20addons=5Fmetadata=5Ffilemetadata=20after=20res?= =?UTF-8?q?tore=20process?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- addons/s3compatb3/utils.py | 6 +- .../export_data/utils.py | 75 +++++ .../export_data/views/management.py | 4 + .../export_data/views/restore.py | 17 +- .../export_data/test_utils.py | 261 +++++++++++++++++- osf/models/export_data.py | 4 +- osf/models/export_data_restore.py | 4 +- 7 files changed, 359 insertions(+), 12 deletions(-) diff --git a/addons/s3compatb3/utils.py b/addons/s3compatb3/utils.py index b755a186340..1c004549ee9 100644 --- a/addons/s3compatb3/utils.py +++ b/addons/s3compatb3/utils.py @@ -177,8 +177,10 @@ def get_user_info(host, access_key, secret_key): connection = connect_s3compatb3(host, access_key, secret_key) buckets = connection.buckets.all() [bucket.name for bucket in buckets] - identity = boto3.client('sts').get_caller_identity() - return identity + return connection + # Fix for not being able to setup Oracle Cloud Infrastructure for Institutions + # identity = boto3.client('sts').get_caller_identity() + # return identity except ClientError: return None return None diff --git a/admin/rdm_custom_storage_location/export_data/utils.py b/admin/rdm_custom_storage_location/export_data/utils.py index 6203806d556..e5537eced17 100644 --- a/admin/rdm_custom_storage_location/export_data/utils.py +++ b/admin/rdm_custom_storage_location/export_data/utils.py @@ -13,6 +13,7 @@ from addons.base.institutions_utils import KEYNAME_BASE_FOLDER from addons.dropboxbusiness import utils as dropboxbusiness_utils +from addons.metadata.models import FileMetadata from addons.nextcloudinstitutions import KEYNAME_NOTIFICATION_SECRET from addons.nextcloudinstitutions.models import NextcloudInstitutionsProvider from addons.osfstorage.models import Region @@ -50,6 +51,8 @@ 'write_json_file', 'check_diff_between_version', 'count_files_ng_ok', + 'is_add_on_storage', + 'check_file_metadata', ] ANY_BACKUP_FOLDER_REGEX = '^\\/backup_\\d{8,13}\\/.*$' @@ -429,6 +432,44 @@ def count_files_ng_ok(exported_file_versions, storage_file_versions, exclude_key return data +def check_file_metadata(data, restore_data, storage_file_info): + destination_region = restore_data.destination + destination_provider = destination_region.provider_name + if not is_add_on_storage(destination_provider): + destination_provider = 'osfstorage' + storage_files = storage_file_info.get('files', []) + list_file_ng = data.get('list_file_ng', []) + for file in storage_files: + file_materialized_path = file.get('materialized_path') + file_project_guid = file.get('project', {}).get('id') + file_provider = file.get('provider') + if not is_add_on_storage(file_provider): + file_provider = 'osfstorage' + project = AbstractNode.load(file_project_guid) + old_file_metadata_queryset = FileMetadata.objects.filter(project__owner=project, path=f'{file_provider}{file_materialized_path}', deleted=None) + restored_file_metadata_queryset = FileMetadata.objects.filter(project__owner=project, path=f'{destination_provider}{file_materialized_path}', deleted=None) + if not restored_file_metadata_queryset.exists() and old_file_metadata_queryset.exists(): + # Metadata path does not change, add to NG list + file_is_ng = False + for item in data.get('list_file_ng', []): + if item.get('path') == file_materialized_path: + # If file already has NG, add reason + file_is_ng = True + item['reason'] += '\nFile metadata is not updated' + if not file_is_ng: + # If file is not NG then add new record + data['ok'] -= 1 + data['ng'] += 1 + list_file_ng.append({ + 'path': file_materialized_path, + 'size': file.get('size'), + 'version_id': None, + 'reason': 'File metadata is not updated', + }) + data['list_file_ng'] = list_file_ng if len(list_file_ng) <= 10 else list_file_ng[:10] + return data + + def check_for_any_running_restore_process(destination_id): return ExportDataRestore.objects.filter(destination_id=destination_id).exclude( Q(status=ExportData.STATUS_STOPPED) | Q(status=ExportData.STATUS_COMPLETED) | Q(status=ExportData.STATUS_ERROR)).exists() @@ -1055,3 +1096,37 @@ def create_parent_folders_for_nextcloud_for_institutions(node_id, provider, path return {'error': 'Cannot create folder for Nextcloud for Institutions'} parent_path += folder_path return None + + +def update_file_metadata(project_guid, source_provider, destination_provider, file_path): + """ Update restored file path of addons_metadata_filemetadata """ + project = AbstractNode.load(project_guid) + if not project: + return + + old_metadata_path = f'{source_provider}{file_path}' + new_metadata_path = f'{destination_provider}{file_path}' + file_metadata_queryset = FileMetadata.objects.filter(project__owner=project, path=old_metadata_path, deleted=None) + if file_metadata_queryset.exists(): + file_metadata = file_metadata_queryset.first() + file_metadata.path = new_metadata_path + file_metadata.save() + + +def update_all_folders_metadata(institution, destination_provider): + """ Update folder path of addons_metadata_filemetadata """ + if not institution or is_add_on_storage(destination_provider) is None: + # If input is invalid then do nothing + return + + with transaction.atomic(): + institution_users = institution.osfuser_set.all() + project_queryset = AbstractNode.objects.filter(type='osf.node', is_deleted=False, creator__in=institution_users) + file_metadata_list = FileMetadata.objects.filter(folder=True, project__owner__in=project_queryset, deleted=None) + for file_metadata in file_metadata_list: + path = file_metadata.path + path_parts = path.split('/') + if len(path_parts) > 1: + path_parts[0] = destination_provider + file_metadata.path = '/'.join(path_parts) + file_metadata.save() diff --git a/admin/rdm_custom_storage_location/export_data/views/management.py b/admin/rdm_custom_storage_location/export_data/views/management.py index 45c70a4a9ab..1b73e0ea3a5 100644 --- a/admin/rdm_custom_storage_location/export_data/views/management.py +++ b/admin/rdm_custom_storage_location/export_data/views/management.py @@ -17,6 +17,7 @@ validate_exported_data, count_files_ng_ok, is_add_on_storage, + check_file_metadata, ) from osf.models import ExportData, Institution from website.util import inspect_info # noqa @@ -484,6 +485,9 @@ def get(self, request, data_id): 'etag', 'extra'] data = count_files_ng_ok(exported_file_versions, storage_file_versions, exclude_keys=exclude_keys) + # Check addons_metadata_filemetadata + data = check_file_metadata(data, restore_data, storage_file_info) + # end check restore_data.status = ExportData.STATUS_COMPLETED restore_data.save() diff --git a/admin/rdm_custom_storage_location/export_data/views/restore.py b/admin/rdm_custom_storage_location/export_data/views/restore.py index e714bd3f85b..76cd433a518 100644 --- a/admin/rdm_custom_storage_location/export_data/views/restore.py +++ b/admin/rdm_custom_storage_location/export_data/views/restore.py @@ -23,7 +23,7 @@ from admin.rdm_custom_storage_location import tasks from admin.rdm_custom_storage_location.export_data import utils from osf.models import ExportData, ExportDataRestore, BaseFileNode, Tag, RdmFileTimestamptokenVerifyResult, Institution, OSFUser, FileVersion, AbstractNode, \ - ProjectStorageType, UserQuota, Guid + ProjectStorageType, UserQuota from website.util import inspect_info # noqa from framework.transactions.handlers import no_auto_transaction from website.util.quota import update_user_used_quota @@ -229,6 +229,10 @@ def restore_export_data_process(task, cookies, export_id, export_data_restore_id # Add tags, timestamp to created file nodes add_tag_and_timestamp_to_database(task, current_process_step, list_created_file_nodes) + # Update metadata of folders + institution = Institution.load(destination_region.guid) + utils.update_all_folders_metadata(institution, destination_provider) + # Update process data with process_end timestamp and 'Completed' status export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), status=ExportData.STATUS_COMPLETED) @@ -646,6 +650,9 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor # Create file node if not have for add-on storage utils.prepare_file_node_for_add_on_storage(file_project_id, destination_provider, file_materialized_path, **kwargs) + # update file metadata + utils.update_file_metadata(file_project_id, file_provider, destination_provider, file_materialized_path) + response_id = response_body.get('data', {}).get('id') response_file_version_id = response_body.get('data', {}).get('attributes', {}).get('extra', {}).get('version', version_id) if response_id.startswith('osfstorage'): @@ -706,16 +713,16 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor else: # If id is provider_name/[path] then get path file_path_splits = response_id.split('/') - if len(file_path_splits) >= 2: + if len(file_path_splits) > 1: file_path_splits[0] = '' file_node_path = '/'.join(file_path_splits) - project_id = Guid.objects.filter(_id=file_project_id).values_list('id', flat=True).first() - if project_id is None: + project = AbstractNode.load(file_project_id) + if not project: continue node_set = BaseFileNode.objects.filter( type='osf.{}file'.format(destination_provider), _path=file_node_path, - target_object_id=project_id, + target_object_id=project.id, deleted=None) if node_set.exists(): node = node_set.first() diff --git a/admin_tests/rdm_custom_storage_location/export_data/test_utils.py b/admin_tests/rdm_custom_storage_location/export_data/test_utils.py index dd8a51397e1..6dc3be0d502 100644 --- a/admin_tests/rdm_custom_storage_location/export_data/test_utils.py +++ b/admin_tests/rdm_custom_storage_location/export_data/test_utils.py @@ -11,6 +11,7 @@ from requests import ConnectionError from rest_framework import status +from addons.metadata.models import NodeSettings as MetadataNodeSettings, FileMetadata from addons.nextcloudinstitutions.models import NextcloudInstitutionsProvider from admin.rdm_custom_storage_location.export_data import utils from admin.rdm_custom_storage_location.export_data.views import management @@ -25,7 +26,7 @@ ExportDataFactory, ExportDataRestoreFactory, ProjectFactory, - DraftNodeFactory, + DraftNodeFactory, RegionFactory, bulkmount_waterbutler_settings, ) from tests.base import AdminTestCase from website.settings import INSTITUTIONAL_STORAGE_ADD_ON_METHOD @@ -1393,6 +1394,9 @@ def setUp(self): self.export_data = ExportDataFactory() self.export_data_restore = ExportDataRestoreFactory() self.export_data_restore.export = self.export_data + self.project = ProjectFactory(creator=self.user) + self.metadata_node_settings = MetadataNodeSettings(owner=self.project) + self.metadata_node_settings.save() def test_count_file_ng_ok(self): data_old = utils.process_data_information(FAKE_DATA['files']) @@ -1511,6 +1515,150 @@ def test_check_diff_between_dict(self): res = utils.deep_diff(a_new, a_standard, exclude_keys=['section1', 'section2']) nt.assert_not_equal(res, None) + # check_file_metadata + def test_check_file_metadata_not_set(self): + region = RegionFactory(waterbutler_settings=bulkmount_waterbutler_settings) + export_data_restore = ExportDataRestoreFactory(destination=region) + storage_file_info = { + 'files': [ + { + 'provider': 'dropboxbusiness', + 'materialized_path': '/test_file.txt', + 'project': { + 'id': self.project._id, + } + } + ] + } + data = { + 'ng': 0, + 'ok': 1, + 'total': 1, + 'list_file_ng': [], + } + result = utils.check_file_metadata(data, export_data_restore, storage_file_info) + nt.assert_equal(result.get('ng'), 0) + nt.assert_equal(result.get('ok'), 1) + nt.assert_equal(result.get('total'), 1) + nt.assert_equal(len(result.get('list_file_ng', [])), 0) + + def test_check_file_metadata_add_new_ng(self): + region = RegionFactory(waterbutler_settings=bulkmount_waterbutler_settings) + export_data_restore = ExportDataRestoreFactory(destination=region) + file_materialized_path = '/test_file.txt' + storage_file_info = { + 'files': [ + { + 'provider': 'dropboxbusiness', + 'materialized_path': file_materialized_path, + 'project': { + 'id': self.project._id, + } + } + ] + } + data = { + 'ng': 0, + 'ok': 1, + 'total': 1, + 'list_file_ng': [], + } + FileMetadata.objects.create( + creator=self.user, + user=self.user, + project=self.metadata_node_settings, + path=f'dropboxbusiness{file_materialized_path}', + hash='test_hash', + folder=False, + metadata={'items': []}, + ) + result = utils.check_file_metadata(data, export_data_restore, storage_file_info) + nt.assert_equal(result.get('ng'), 1) + nt.assert_equal(result.get('ok'), 0) + nt.assert_equal(result.get('total'), 1) + list_file_ng = result.get('list_file_ng', []) + nt.assert_equal(len(list_file_ng), 1) + nt.assert_equal(list_file_ng[0].get('reason'), 'File metadata is not updated') + + def test_check_file_metadata_update_ng(self): + region = RegionFactory(waterbutler_settings=bulkmount_waterbutler_settings) + export_data_restore = ExportDataRestoreFactory(destination=region) + file_materialized_path = '/test_file.txt' + storage_file_info = { + 'files': [ + { + 'provider': 'dropboxbusiness', + 'materialized_path': file_materialized_path, + 'project': { + 'id': self.project._id, + } + } + ] + } + data = { + 'ng': 1, + 'ok': 0, + 'total': 1, + 'list_file_ng': [{ + 'path': file_materialized_path, + 'size': 0, + 'version_id': 1, + 'reason': '"timestamp" not match', + }], + } + FileMetadata.objects.create( + creator=self.user, + user=self.user, + project=self.metadata_node_settings, + path=f'dropboxbusiness{file_materialized_path}', + hash='test_hash', + folder=False, + metadata={'items': []}, + ) + result = utils.check_file_metadata(data, export_data_restore, storage_file_info) + nt.assert_equal(result.get('ng'), 1) + nt.assert_equal(result.get('ok'), 0) + nt.assert_equal(result.get('total'), 1) + list_file_ng = result.get('list_file_ng', []) + nt.assert_equal(len(list_file_ng), 1) + nt.assert_equal(list_file_ng[0].get('reason'), '"timestamp" not match\nFile metadata is not updated') + + def test_check_file_metadata_ok(self): + region = RegionFactory(waterbutler_settings=bulkmount_waterbutler_settings) + export_data_restore = ExportDataRestoreFactory(destination=region) + file_materialized_path = '/test_file.txt' + storage_file_info = { + 'files': [ + { + 'provider': 'dropboxbusiness', + 'materialized_path': file_materialized_path, + 'project': { + 'id': self.project._id, + } + } + ] + } + data = { + 'ng': 0, + 'ok': 1, + 'total': 1, + 'list_file_ng': [], + } + FileMetadata.objects.create( + creator=self.user, + user=self.user, + project=self.metadata_node_settings, + path=f'osfstorage{file_materialized_path}', + hash='test_hash', + folder=False, + metadata={'items': []}, + ) + result = utils.check_file_metadata(data, export_data_restore, storage_file_info) + nt.assert_equal(result.get('ng'), 0) + nt.assert_equal(result.get('ok'), 1) + nt.assert_equal(result.get('total'), 1) + nt.assert_equal(len(result.get('list_file_ng', [])), 0) + @pytest.mark.feature_202210 class TestUtilsForRestoreData(AdminTestCase): @@ -3319,3 +3467,114 @@ def test_is_add_on_storage(self): nt.assert_false(utils.is_add_on_storage('owncloud')) nt.assert_false(utils.is_add_on_storage('s3compat')) nt.assert_false(utils.is_add_on_storage('s3')) + + # update_file_metadata + def test_update_file_metadata_project_not_found(self): + user = AuthUserFactory() + project = ProjectFactory() + metadata_node_settings = MetadataNodeSettings(owner=project) + metadata_node_settings.save() + source_provider = 'osfstorage' + destination_provider = 's3compatinstitutions' + file_path = '/test_file.txt' + FileMetadata.objects.create( + creator=user, + user=user, + project=metadata_node_settings, + path=f'{source_provider}{file_path}', + hash='test_hash', + folder=False, + metadata={'items': []} + ) + utils.update_file_metadata(None, source_provider, destination_provider, file_path) + new_metadata = FileMetadata.objects.filter(project=metadata_node_settings, path=f'{destination_provider}{file_path}') + nt.assert_false(new_metadata.exists()) + + def test_update_file_metadata_no_update(self): + user = AuthUserFactory() + project = ProjectFactory() + metadata_node_settings = MetadataNodeSettings(owner=project) + metadata_node_settings.save() + source_provider = 'dropboxbusiness' + destination_provider = 's3compatinstitutions' + file_path = '/test_file.txt' + FileMetadata.objects.create( + creator=user, + user=user, + project=metadata_node_settings, + path=f'{source_provider}{file_path}', + hash='test_hash', + folder=False, + metadata={'items': []} + ) + utils.update_file_metadata(None, 'osfstorage', destination_provider, file_path) + new_metadata = FileMetadata.objects.filter(project=metadata_node_settings, path=f'{destination_provider}{file_path}') + nt.assert_false(new_metadata.exists()) + + def test_update_file_metadata(self): + user = AuthUserFactory() + project = ProjectFactory() + metadata_node_settings = MetadataNodeSettings(owner=project) + metadata_node_settings.save() + source_provider = 'osfstorage' + destination_provider = 's3compatinstitutions' + file_path = '/test_file.txt' + FileMetadata.objects.create( + creator=user, + user=user, + project=metadata_node_settings, + path=f'{source_provider}{file_path}', + hash='test_hash', + folder=False, + metadata={'items': []}, + ) + utils.update_file_metadata(project._id, source_provider, destination_provider, file_path) + new_metadata = FileMetadata.objects.filter(project=metadata_node_settings, path=f'{destination_provider}{file_path}') + nt.assert_true(new_metadata.exists()) + + # update_all_folders_metadata + def test_update_all_folders_metadata_invalid_input(self): + user = AuthUserFactory() + project = ProjectFactory() + metadata_node_settings = MetadataNodeSettings(owner=project) + metadata_node_settings.save() + source_provider = 'osfstorage' + destination_provider = 's3compatinstitutions' + folder_path = '/test_folder/' + FileMetadata.objects.create( + creator=user, + user=user, + project=metadata_node_settings, + path=f'{source_provider}{folder_path}', + hash='test_hash', + folder=True, + metadata={'items': []}, + ) + utils.update_all_folders_metadata(None, destination_provider) + new_metadata = FileMetadata.objects.filter(project=metadata_node_settings, folder=True, path=f'{destination_provider}{folder_path}') + nt.assert_false(new_metadata.exists()) + + def test_update_all_folders_metadata(self): + user = AuthUserFactory() + project = ProjectFactory(creator=user) + metadata_node_settings = MetadataNodeSettings(owner=project) + metadata_node_settings.save() + source_provider = 'osfstorage' + destination_provider = 's3compatinstitutions' + folder_path = '/test_folder/' + FileMetadata.objects.create( + creator=user, + user=user, + project=metadata_node_settings, + path=f'{source_provider}{folder_path}', + hash='test_hash', + folder=True, + metadata={'items': []}, + ) + institution = InstitutionFactory() + institution.nodes.set([project]) + institution.osfuser_set.add(user) + institution.save() + utils.update_all_folders_metadata(institution, destination_provider) + new_metadata = FileMetadata.objects.filter(project=metadata_node_settings, folder=True, path=f'{destination_provider}{folder_path}') + nt.assert_true(new_metadata.exists()) diff --git a/osf/models/export_data.py b/osf/models/export_data.py index 30e39696e7b..db93a9f08ca 100644 --- a/osf/models/export_data.py +++ b/osf/models/export_data.py @@ -205,9 +205,9 @@ def extract_file_information_json_from_source_storage(self, **kwargs): target_object_id__in=projects__ids, deleted=None) else: - # Add-on storage + # Add-on storage: get base_file_nodes based on type, provider name and project ids base_file_nodes = BaseFileNode.objects.filter( - type='osf.{}file'.format(self.source.provider_name), + type=f'osf.{self.source.provider_name}file', provider=self.source.provider_name, target_object_id__in=projects__ids, deleted=None) diff --git a/osf/models/export_data_restore.py b/osf/models/export_data_restore.py index 2b81563e4cf..42bb1e7e567 100644 --- a/osf/models/export_data_restore.py +++ b/osf/models/export_data_restore.py @@ -126,9 +126,9 @@ def extract_file_information_json_from_destination_storage(self, **kwargs): target_object_id__in=projects__ids, deleted=None) else: - # Add-on storage + # Add-on storage: get base_file_nodes based on type, provider name and project ids base_file_nodes = BaseFileNode.objects.filter( - type='osf.{}file'.format(self.destination.provider_name), + type=f'osf.{self.destination.provider_name}file', provider=self.destination.provider_name, target_object_id__in=projects__ids, deleted=None) From f746d33319352c6749a87bcc8db7b9c83b44f056 Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Fri, 4 Aug 2023 18:10:59 +0700 Subject: [PATCH 03/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Exclude=20empty=20incom?= =?UTF-8?q?plete=20osf=5Fbasefilenode=20in=20export=20from=20add-on=20stor?= =?UTF-8?q?age=20and=20fix=20file=20render=20error=20on=20RDM=20web?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- osf/models/export_data.py | 1 + osf/models/export_data_restore.py | 1 + website/util/waterbutler.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/osf/models/export_data.py b/osf/models/export_data.py index db93a9f08ca..a71003896c4 100644 --- a/osf/models/export_data.py +++ b/osf/models/export_data.py @@ -210,6 +210,7 @@ def extract_file_information_json_from_source_storage(self, **kwargs): type=f'osf.{self.source.provider_name}file', provider=self.source.provider_name, target_object_id__in=projects__ids, + _materialized_path__isnull=False, deleted=None) total_size = 0 diff --git a/osf/models/export_data_restore.py b/osf/models/export_data_restore.py index 42bb1e7e567..f508275fc89 100644 --- a/osf/models/export_data_restore.py +++ b/osf/models/export_data_restore.py @@ -131,6 +131,7 @@ def extract_file_information_json_from_destination_storage(self, **kwargs): type=f'osf.{self.destination.provider_name}file', provider=self.destination.provider_name, target_object_id__in=projects__ids, + _materialized_path__isnull=False, deleted=None) total_size = 0 diff --git a/website/util/waterbutler.py b/website/util/waterbutler.py index 6bd3ce7f2bd..f0ccef22f70 100644 --- a/website/util/waterbutler.py +++ b/website/util/waterbutler.py @@ -24,7 +24,7 @@ def download_file(osf_cookie, file_node, download_path, **kwargs): try: response = requests.get( - file_node.generate_waterbutler_url(action='download', direct=None, **kwargs), + file_node.generate_waterbutler_url(action='download', direct=None, _internal=True, **kwargs), cookies={settings.COOKIE_NAME: osf_cookie}, stream=True ) From dd895196f021604bef0941af99d72d9e708fd815 Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Thu, 10 Aug 2023 18:50:10 +0700 Subject: [PATCH 04/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Update=20check=20restor?= =?UTF-8?q?e=20exclude=20keys=20and=20update=20node=20settings=20for=20pro?= =?UTF-8?q?jects=20that=20has=20set=20the=20provider=20before?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../export_data/views/management.py | 7 +++++-- admin/rdm_custom_storage_location/utils.py | 20 ++++++++++++------- .../rdm_custom_storage_location/test_utils.py | 2 ++ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/admin/rdm_custom_storage_location/export_data/views/management.py b/admin/rdm_custom_storage_location/export_data/views/management.py index 1b73e0ea3a5..eb3771a8d0d 100644 --- a/admin/rdm_custom_storage_location/export_data/views/management.py +++ b/admin/rdm_custom_storage_location/export_data/views/management.py @@ -473,13 +473,16 @@ def get(self, request, data_id): exported_file_versions = process_data_information(exported_file_info['files']) storage_file_versions = process_data_information(storage_file_info['files']) exported_provider_name = export_data.source_waterbutler_settings.get('storage', {}).get('provider') - if is_add_on_storage(exported_provider_name): + destination_provider_name = restore_data.destination.provider_name + if is_add_on_storage(exported_provider_name) or is_add_on_storage(destination_provider_name): + # If either source or destination is add-on storage then exclude the following keys exclude_keys = ['id', 'path', 'created_at', 'modified_at', 'timestamp_id', # location/ 'location', # metadata/ - 'etag', 'extra', 'modified', 'provider', 'contentType', 'modified_utc', 'created_utc'] + 'metadata'] else: + # If source and destination are bulk-mount storages then exclude the following keys exclude_keys = ['host', 'bucket', 'folder', 'service', 'provider', 'verify_ssl', 'address', 'version', # metadata/ 'etag', 'extra'] diff --git a/admin/rdm_custom_storage_location/utils.py b/admin/rdm_custom_storage_location/utils.py index e54e77442d6..8fac7eda0a5 100644 --- a/admin/rdm_custom_storage_location/utils.py +++ b/admin/rdm_custom_storage_location/utils.py @@ -1146,16 +1146,22 @@ def add_node_settings_to_projects(institution, provider_name): # If storage is bulk-mount then do nothing return - # Get projects that does not have provider's node settings + # Get projects created by institution users institution_users = institution.osfuser_set.all() - kwargs = {f'addons_{provider_name}_node_settings': None} - projects = AbstractNode.objects.filter(type='osf.node', is_deleted=False, creator__in=institution_users, **kwargs) + projects = AbstractNode.objects.filter(type='osf.node', is_deleted=False, creator__in=institution_users) - # Add node settings to above projects + # Add or update node settings to projects for project in projects: + node_settings = getattr(project, f'addons_{provider_name}_node_settings', None) + project_has_no_node_settings = node_settings is None + if provider_name == 'dropboxbusiness': - dropboxbusiness_post_save(None, project, True) + dropboxbusiness_post_save(None, project, created=project_has_no_node_settings) elif provider_name == 'onedrivebusiness': - onedrivebusiness_post_save(None, project, True) + if not project_has_no_node_settings: + # Reset OneDrive Business folder id before update node settings + node_settings.folder_id = None + node_settings.save() + onedrivebusiness_post_save(None, project, created=project_has_no_node_settings) else: - node_post_save(None, project, True) + node_post_save(None, project, created=project_has_no_node_settings) diff --git a/admin_tests/rdm_custom_storage_location/test_utils.py b/admin_tests/rdm_custom_storage_location/test_utils.py index 751afc2af64..7a51fb06b57 100644 --- a/admin_tests/rdm_custom_storage_location/test_utils.py +++ b/admin_tests/rdm_custom_storage_location/test_utils.py @@ -76,6 +76,8 @@ def test_add_node_settings_to_projects_dropboxbusiness(self): def test_add_node_settings_to_projects_onedrivebusiness(self): user = AuthUserFactory() project = ProjectFactory(creator=user) + project.add_addon('onedrivebusiness', None) + project.save() region = RegionFactory(waterbutler_settings=addon_waterbutler_settings) institution = InstitutionFactory.create(_id=region.guid) institution.nodes.set([project]) From f5e3e456477c6aa194106b1f9f2958d1c7b3f905 Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Fri, 11 Aug 2023 18:44:58 +0700 Subject: [PATCH 05/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Apply=20web=20UI=20fixe?= =?UTF-8?q?s=20for=20add-on=20storages=20to=20be=20able=20to=20run=20corre?= =?UTF-8?q?ctly.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../rdm-institutional-storage-page.js | 1 - api/base/utils.py | 5 +++-- website/templates/project/view_file.mako | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js b/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js index bcd44df2daa..c1e2822babe 100644 --- a/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js +++ b/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js @@ -1117,7 +1117,6 @@ function checkStatusExportData(institution_id, source_id, location_id, task_id, var data_res = data.result.list_file_info_export_not_found; var text_show_file = ''; file_name_export_fail = data.result.file_name_export_fail; - need_reload = 0; $('#showFileExportNotExistModal').modal('show'); list_file_info_export_fail = [['project_id', 'project_name', 'owner', 'file_id', 'file_path', 'file_name', 'versions', 'size', 'stamper']]; diff --git a/api/base/utils.py b/api/base/utils.py index cc7c255655f..57a5e89b15f 100644 --- a/api/base/utils.py +++ b/api/base/utils.py @@ -5,7 +5,6 @@ from distutils.version import StrictVersion from hashids import Hashids -from django.utils.http import urlquote from django.core.exceptions import ObjectDoesNotExist from django.db.models import QuerySet, F from rest_framework.exceptions import NotFound @@ -207,7 +206,9 @@ def waterbutler_api_url_for(node_id, provider, path='/', _internal=False, base_u base_url = None url = furl.furl(website_settings.WATERBUTLER_INTERNAL_URL if _internal else (base_url or website_settings.WATERBUTLER_URL)) segments = ['v1', 'resources', node_id, 'providers', provider] + path.split('/')[1:] - url.path.segments.extend([urlquote(x) for x in segments]) + # furl auto encoding, so urlquote is not necessary as `[urlquote(x) for x in segments]` + # try to convert segment items to string for furl check + url.path.segments.extend([str(x) for x in segments]) url.args.update(kwargs) return url.url diff --git a/website/templates/project/view_file.mako b/website/templates/project/view_file.mako index f9372a34155..f8c92d7d3ed 100644 --- a/website/templates/project/view_file.mako +++ b/website/templates/project/view_file.mako @@ -238,6 +238,9 @@ }, }); window.contextVars.file.urls.external = window.contextVars.file.extra.webView; + if (window.contextVars.file.path && !window.contextVars.file.path.startsWith('/')) { + window.contextVars.file.path = '/' + window.contextVars.file.path; + } From 79813277194ab9156492d6a4644a41354111aa0b Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Mon, 14 Aug 2023 18:22:21 +0700 Subject: [PATCH 06/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Fix=20IT=20bug=20-=20ad?= =?UTF-8?q?d=20new=20exclude=20keys=20in=20check=20restore=20data=20functi?= =?UTF-8?q?on.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../export_data/views/management.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/admin/rdm_custom_storage_location/export_data/views/management.py b/admin/rdm_custom_storage_location/export_data/views/management.py index eb3771a8d0d..443a24cbf1c 100644 --- a/admin/rdm_custom_storage_location/export_data/views/management.py +++ b/admin/rdm_custom_storage_location/export_data/views/management.py @@ -476,11 +476,13 @@ def get(self, request, data_id): destination_provider_name = restore_data.destination.provider_name if is_add_on_storage(exported_provider_name) or is_add_on_storage(destination_provider_name): # If either source or destination is add-on storage then exclude the following keys - exclude_keys = ['id', 'path', 'created_at', 'modified_at', 'timestamp_id', + exclude_keys = ['id', 'provider', 'path', 'created_at', 'modified_at', 'timestamp_id', # location/ 'location', # metadata/ - 'metadata'] + 'metadata', + # timestamp/ + 'timestamp_id', 'verify_user'] else: # If source and destination are bulk-mount storages then exclude the following keys exclude_keys = ['host', 'bucket', 'folder', 'service', 'provider', 'verify_ssl', 'address', 'version', From f95645d42fa86dfdd2ac6137272ecacf1473645d Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Fri, 18 Aug 2023 14:36:13 +0700 Subject: [PATCH 07/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Fix=20IT=20bug=20-=20co?= =?UTF-8?q?rrect=20path=20value=20of=20file=20node=20for=20OneDrive=20Busi?= =?UTF-8?q?ness=20provider.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../export_data/views/restore.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/admin/rdm_custom_storage_location/export_data/views/restore.py b/admin/rdm_custom_storage_location/export_data/views/restore.py index 76cd433a518..53a9c785e09 100644 --- a/admin/rdm_custom_storage_location/export_data/views/restore.py +++ b/admin/rdm_custom_storage_location/export_data/views/restore.py @@ -646,15 +646,19 @@ def copy_files_from_export_data_to_destination(task, current_process_step, expor list_file_restore_fail.append(file) continue + response_body_data = response_body.get('data', {}) + if is_destination_addon_storage: + # Fix for OneDrive Business because its path is different from other add-on storages + response_file_path = response_body_data.get('attributes', {}).get('path', file_materialized_path) # Create file node if not have for add-on storage - utils.prepare_file_node_for_add_on_storage(file_project_id, destination_provider, file_materialized_path, **kwargs) + utils.prepare_file_node_for_add_on_storage(file_project_id, destination_provider, response_file_path, **kwargs) # update file metadata utils.update_file_metadata(file_project_id, file_provider, destination_provider, file_materialized_path) - response_id = response_body.get('data', {}).get('id') - response_file_version_id = response_body.get('data', {}).get('attributes', {}).get('extra', {}).get('version', version_id) + response_id = response_body_data.get('id') + response_file_version_id = response_body_data.get('attributes', {}).get('extra', {}).get('version', version_id) if response_id.startswith('osfstorage'): # If id is osfstorage/[_id] then get _id file_path_splits = response_id.split('/') From eedc24cae6926a6f8d8768e591619839a4895a83 Mon Sep 17 00:00:00 2001 From: huanphan-tma Date: Wed, 16 Aug 2023 18:28:17 +0700 Subject: [PATCH 08/31] =?UTF-8?q?ref=20[Bug][NII=20Redmine#37211]=E6=A9=9F?= =?UTF-8?q?=E9=96=A2=E3=82=B9=E3=83=88=E3=83=AC=E3=83=BC=E3=82=B8(?= =?UTF-8?q?=E3=82=A2=E3=83=89=E3=82=AA=E3=83=B3=E6=96=B9=E5=BC=8F)?= =?UTF-8?q?=E3=81=AB=E3=81=8A=E3=81=91=E3=82=8B=E3=82=A8=E3=82=AF=E3=82=B9?= =?UTF-8?q?=E3=83=9D=E3=83=BC=E3=83=88=E3=83=BB=E3=83=AA=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=A2=E5=87=A6=E7=90=86=E3=81=AE=E6=AE=8B=E8=AA=B2=E9=A1=8C?= =?UTF-8?q?=E3=81=AB=E3=81=A4=E3=81=84=E3=81=A6:=20Remove=20backup,=20roll?= =?UTF-8?q?back=20functions=20and=20update=20source=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../export_data/utils.py | 367 ------- .../export_data/views/restore.py | 242 +---- admin/rdm_custom_storage_location/tasks.py | 6 - .../rdm-institutional-storage-page.js | 56 +- .../export_data_information.html | 25 - .../export_data/test_utils.py | 949 ------------------ .../export_data/views/test_restore.py | 534 +--------- .../rdm_custom_storage_location/test_tasks.py | 22 - 8 files changed, 54 insertions(+), 2147 deletions(-) diff --git a/admin/rdm_custom_storage_location/export_data/utils.py b/admin/rdm_custom_storage_location/export_data/utils.py index e5537eced17..a144bbc1fd1 100644 --- a/admin/rdm_custom_storage_location/export_data/utils.py +++ b/admin/rdm_custom_storage_location/export_data/utils.py @@ -2,7 +2,6 @@ import inspect # noqa import json # noqa import logging # noqa -import re from copy import deepcopy import jsonschema @@ -716,360 +715,6 @@ def prepare_file_node_for_add_on_storage(node_id, provider, file_path, **kwargs) # signals.file_updated.send(target=node, user=user, event_type=NodeLog.FILE_COPIED, payload=payload) -def move_file(node_id, provider, source_file_path, destination_file_path, cookies, callback_log=False, - base_url=WATERBUTLER_URL, is_addon_storage=True, **kwargs): - move_old_data_url = waterbutler_api_url_for( - node_id, provider, path=source_file_path, _internal=base_url == WATERBUTLER_URL, - base_url=base_url, callback_log=callback_log, **kwargs) - if is_addon_storage: - # Add on storage: move whole source path to root and rename to destination path - destination_file_path = destination_file_path[1:] if destination_file_path.startswith('/') \ - else destination_file_path - request_body = { - 'action': 'move', - 'path': '/', - 'rename': destination_file_path, - } - else: - # Bulk mount storage: move source folder to destination folder - request_body = { - 'action': 'move', - 'path': destination_file_path, - } - return requests.post(move_old_data_url, - headers={'content-type': 'application/json'}, - cookies=cookies, - json=request_body) - - -def move_addon_folder_to_backup( - node_id, provider, process_start, cookies, callback_log=False, - base_url=WATERBUTLER_URL, check_abort_task=None, **kwargs): - path_list, root_child_folders = get_all_file_paths_in_addon_storage( - node_id, provider, '/', cookies, base_url, exclude_path_regex=ANY_BACKUP_FOLDER_REGEX, **kwargs) - if len(path_list) == 0: - return {} - - # Move file - has_error = False - error_message = '' - for path in path_list: - if callable(check_abort_task): - check_abort_task() - try: - paths = path.split('/') - paths.insert(1, f'backup_{process_start}') - new_path = '/'.join(paths) - if provider == 'nextcloudinstitutions' and len(paths) > 2: - # Nextcloud for Institutions: try to create new parent folders before moving files - result = create_parent_folders_for_nextcloud_for_institutions(node_id, provider, paths, cookies=cookies, - callback_log=callback_log, base_url=base_url, **kwargs) - if result is not None: - return result - - response = move_file(node_id, provider, path, new_path, cookies, - callback_log, base_url, is_addon_storage=True, **kwargs) - if response.status_code != 200 and response.status_code != 201 and response.status_code != 202: - logger.error(f'Response return error: {response.content}') - has_error = True - error_message = f'{response.status_code} - {response.content}' - break - except Exception as e: - if callable(check_abort_task): - check_abort_task() - logger.error(f'Exception: {e}') - has_error = True - error_message = repr(e) - break - - if has_error: - return {'error': error_message} - - # S3: Clean root folders after moving - delete_paths(node_id, provider, root_child_folders, - cookies, callback_log, base_url, **kwargs) - return {} - - -def move_addon_folder_from_backup(node_id, provider, process_start, cookies, callback_log=False, base_url=WATERBUTLER_URL, **kwargs): - path_list, root_child_folders = get_all_file_paths_in_addon_storage( - node_id, provider, '/', cookies, base_url, include_path_regex=f'^\\/backup_{process_start}\\/.*$', **kwargs) - if len(path_list) == 0: - return {} - - # Move files and folders from backup to root - has_error = False - error_message = '' - for path in path_list: - try: - paths = path.split('/') - if paths[1] == f'backup_{process_start}': - del paths[1] - else: - continue - new_path = '/'.join(paths) - if provider == 'nextcloudinstitutions' and len(paths) > 2: - # Nextcloud for Institutions: try to create new parent folders before moving files - result = create_parent_folders_for_nextcloud_for_institutions(node_id, provider, paths, cookies=cookies, - callback_log=callback_log, base_url=base_url, **kwargs) - if result is not None: - return result - - response = move_file(node_id, provider, path, new_path, - cookies, callback_log, base_url, is_addon_storage=True, **kwargs) - if response.status_code != 200 and response.status_code != 201 and response.status_code != 202: - logger.error(f'Response return error: {response.content}') - has_error = True - error_message = f'{response.status_code} - {response.content}' - break - except Exception as e: - logger.error(f'Exception: {e}') - has_error = True - error_message = repr(e) - break - - if has_error: - return {'error': error_message} - - # S3: Clean backup folders after moving - delete_paths(node_id, provider, root_child_folders, - cookies, callback_log, base_url, **kwargs) - return {} - - -def get_all_file_paths_in_addon_storage(node_id, provider, file_path, cookies, base_url=WATERBUTLER_URL, - include_path_regex='', exclude_path_regex='', **kwargs): - try: - response = get_file_data(node_id, provider, file_path, cookies, base_url=base_url, get_file_info=True, **kwargs) - if response.status_code != 200: - return [], [] - response_body = response.json() - data = response_body.get('data') - if len(data) != 0: - list_file_path = [] - root_child_folders = [] - for item in data: - path = item.get('attributes', {}).get('path') - materialized_path = item.get('attributes', {}).get('materialized') - kind = item.get('attributes', {}).get('kind') - - try: - if isinstance(include_path_regex, str) and len(include_path_regex) != 0: - pattern = re.compile(include_path_regex) - if not pattern.match(materialized_path): - continue - if isinstance(exclude_path_regex, str) and len(exclude_path_regex) != 0: - pattern = re.compile(exclude_path_regex) - if pattern.match(materialized_path): - continue - except Exception as e: - logger.error(f'Exception: {e}') - continue - - if kind == 'file': - list_file_path.append(path) - elif kind == 'folder': - if file_path == '/': - # S3: Add to list need to delete - root_child_folders.append(path) - # Call this function again - sub_file_paths, _ = get_all_file_paths_in_addon_storage(node_id, provider, path, cookies, base_url, **kwargs) - list_file_path.extend(sub_file_paths) - - return list_file_path, root_child_folders - else: - return [], [] - except Exception: - return [], [] - - -def move_bulk_mount_folder_to_backup( - node_id, provider, process_start, cookies, callback_log=False, - base_url=WATERBUTLER_URL, check_abort_task=None, **kwargs): - path_list, _ = get_all_child_paths_in_bulk_mount_storage( - node_id, provider, '/', cookies, base_url, exclude_path_regex=ANY_BACKUP_FOLDER_REGEX, **kwargs) - if len(path_list) == 0: - return {} - - # Move file - has_error = False - error_message = '' - new_materialized_path = f'/backup_{process_start}/' - - # OSF storage: create new backup folder - try: - if callable(check_abort_task): - check_abort_task() - response_body, status_code = create_folder(node_id, provider, '/', new_materialized_path[1:], - cookies, callback_log, base_url, **kwargs) - if status_code != 201: - return {'error': 'Cannot create backup folder'} - new_path = response_body['data']['attributes']['path'] - except Exception as e: - logger.error(f'Exception: {e}') - return {'error': repr(e)} - - # Move all root child files and folders to backup folder - for path, materialized_path in path_list: - if callable(check_abort_task): - check_abort_task() - try: - response = move_file(node_id, provider, path, new_path, - cookies, callback_log, base_url, is_addon_storage=False, **kwargs) - if response.status_code != 200 and response.status_code != 201 and response.status_code != 202: - logger.error(f'Response return error: {response.content}') - # Rollback - has_error = True - error_message = f'{response.status_code} - {response.content}' - break - except Exception as e: - logger.error(f'Exception: {e}') - has_error = True - error_message = repr(e) - break - - if has_error: - return {'error': error_message} - return {} - - -def move_bulk_mount_folder_from_backup(node_id, provider, process_start, cookies, callback_log=False, base_url=WATERBUTLER_URL, **kwargs): - path_list, backup_path = get_all_child_paths_in_bulk_mount_storage( - node_id, provider, f'/backup_{process_start}/', - cookies, base_url, get_path_from=f'/backup_{process_start}/', **kwargs) - if len(path_list) == 0: - return {} - - # Move files and folders from backup to root - has_error = False - error_message = '' - root_path = '/' - for path, materialized_path in path_list: - try: - response = move_file(node_id, provider, path, root_path, - cookies, callback_log, base_url, is_addon_storage=False, **kwargs) - if response.status_code != 200 and response.status_code != 201 and response.status_code != 202: - logger.error(f'Response return error: {response.content}') - has_error = True - error_message = f'{response.status_code} - {response.content}' - break - except Exception as e: - logger.error(f'Exception: {e}') - has_error = True - error_message = repr(e) - break - - if has_error: - return {'error': error_message} - - # OSF storage: Delete backup folder after moving - delete_paths(node_id, provider, [backup_path], cookies, callback_log, base_url, **kwargs) - return {} - - -def get_all_child_paths_in_bulk_mount_storage( - node_id, provider, file_materialized_path, cookies, - base_url=WATERBUTLER_URL, exclude_path_regex='', get_path_from='', **kwargs): - list_file_path = [] - path_from_args = None - try: - if not file_materialized_path.startswith('/') or not file_materialized_path.endswith('/'): - return list_file_path, path_from_args - paths = file_materialized_path.split('/')[1:] - if len(paths) > 0: - current_path = '/' - current_materialized_path = '/' - for index, path in enumerate(paths): - response = get_file_data(node_id, provider, current_path, cookies, base_url=base_url, get_file_info=True, **kwargs) - if response.status_code != 200: - return [], None - response_body = response.json() - data = response_body.get('data', []) - if index == len(paths) - 1: - for item in data: - path = item.get('attributes', {}).get('path') - materialized_path = item.get('attributes', {}).get('materialized') - try: - if isinstance(exclude_path_regex, str) and len(exclude_path_regex) != 0: - pattern = re.compile(exclude_path_regex) - if pattern.match(materialized_path): - continue - except Exception as e: - logger.error(f'Exception: {e}') - continue - list_file_path.append((path, materialized_path)) - else: - current_materialized_path = f'{current_materialized_path}{path}/' - current_path_info = next((item for item in data if item.get('attributes', {}).get('materialized') == - current_materialized_path), None) - if current_path_info is None: - break - - current_path = current_path_info['attributes']['path'] - if current_path_info['attributes']['materialized'] == get_path_from: - path_from_args = current_path - return list_file_path, path_from_args - except Exception: - return list_file_path, path_from_args - - -def delete_paths(node_id, provider, paths, - cookies, callback_log=False, base_url=WATERBUTLER_URL, **kwargs): - for path in paths: - try: - delete_file(node_id, provider, path, - cookies, callback_log, base_url, **kwargs) - except Exception as e: - logger.error(f'Exception: {e}') - - -def delete_file(node_id, provider, file_path, cookies, callback_log=False, base_url=WATERBUTLER_URL, **kwargs): - destination_storage_backup_meta_api = waterbutler_api_url_for( - node_id, provider, path=file_path, - _internal=base_url == WATERBUTLER_URL, base_url=base_url, - callback_log=callback_log, **kwargs) - return requests.delete(destination_storage_backup_meta_api, - headers={'content-type': 'application/json'}, - cookies=cookies) - - -def delete_all_files_except_backup(node_id, provider, cookies, callback_log=False, base_url=WATERBUTLER_URL, **kwargs): - # In add-on institutional storage: Delete files, except the backup folder. - list_not_backup_paths = [] - try: - response = get_file_data(node_id, provider, '/', cookies, base_url=base_url, get_file_info=True, **kwargs) - if response.status_code != 200: - raise Exception(f'Cannot get file info list.') - response_body = response.json() - data = response_body.get('data') - if len(data) != 0: - for item in data: - path = item.get('attributes', {}).get('path') - materialized_path = item.get('attributes', {}).get('materialized') - kind = item.get('attributes', {}).get('kind') - - try: - pattern = re.compile(ANY_BACKUP_FOLDER_REGEX) - if pattern.match(materialized_path): - continue - except Exception as e: - logger.error(f'Exception: {e}') - - if kind == 'file' or kind == 'folder': - list_not_backup_paths.append(path) - except (requests.ConnectionError, requests.Timeout) as e: - logger.error(f'Connection error: {e}') - raise e - - # Delete all paths - for path in list_not_backup_paths: - try: - delete_file(node_id, provider, path, cookies, callback_log, base_url, **kwargs) - except (requests.ConnectionError, requests.Timeout) as e: - logger.error(f'Connection error: {e}') - raise e - - def is_add_on_storage(provider): if not provider: return None @@ -1086,18 +731,6 @@ def is_add_on_storage(provider): return None -def create_parent_folders_for_nextcloud_for_institutions(node_id, provider, paths, **kwargs): - """ Nextcloud for Institutions: create folders before moving files """ - parent_path = '/' - for path in paths[1:len(paths) - 1]: - folder_path = f'{path}/' - _, status_code = create_folder(node_id, provider, parent_path, folder_path, **kwargs) - if status_code not in [201, 409]: - return {'error': 'Cannot create folder for Nextcloud for Institutions'} - parent_path += folder_path - return None - - def update_file_metadata(project_guid, source_provider, destination_provider, file_path): """ Update restored file path of addons_metadata_filemetadata """ project = AbstractNode.load(project_guid) diff --git a/admin/rdm_custom_storage_location/export_data/views/restore.py b/admin/rdm_custom_storage_location/export_data/views/restore.py index 5021c80e885..dbfefd395aa 100644 --- a/admin/rdm_custom_storage_location/export_data/views/restore.py +++ b/admin/rdm_custom_storage_location/export_data/views/restore.py @@ -5,7 +5,6 @@ import inspect # noqa import json import logging -from functools import partial from celery.states import PENDING from celery.contrib.abortable import AbortableAsyncResult, ABORTED @@ -53,25 +52,21 @@ def post(self, request, **kwargs): kwargs.setdefault('cookie', cookie) cookies = request.COOKIES creator = request.user - is_from_confirm_dialog = request.POST.get('is_from_confirm_dialog', default=False) if destination_id is None or export_id is None: return Response({'message': f'Missing required parameters.'}, status=status.HTTP_400_BAD_REQUEST) - if not is_from_confirm_dialog: - # Check the destination is available (not in restore process or checking restore data process) - any_process_running = utils.check_for_any_running_restore_process(destination_id) - if any_process_running: - return Response({'message': f'Cannot restore in this time.'}, status=status.HTTP_400_BAD_REQUEST) - - result = check_before_restore_export_data(cookies, export_id, destination_id, cookie=cookie) - if result.get('open_dialog'): - # If open_dialog is True, return HTTP 200 with empty response - return Response({}, status=status.HTTP_200_OK) - elif 'not_found' in result: - return Response({'message': result.get('message')}, status=status.HTTP_404_NOT_FOUND) - elif result.get('message'): - # If there is error message, return HTTP 400 - return Response({'message': result.get('message')}, status=status.HTTP_400_BAD_REQUEST) + # Check the destination is available (not in restore process or checking restore data process) + any_process_running = utils.check_for_any_running_restore_process(destination_id) + if any_process_running: + return Response({'message': f'Cannot restore in this time.'}, status=status.HTTP_400_BAD_REQUEST) + + result = check_before_restore_export_data(cookies, export_id, destination_id, cookie=cookie) + response_body = {'message': result.get('message')} + if 'not_found' in result: + return Response(response_body, status=status.HTTP_404_NOT_FOUND) + elif result.get('message'): + # If there is error message, return HTTP 400 + return Response(response_body, status=status.HTTP_400_BAD_REQUEST) # Start restore data task and return task id export_data = ExportData.objects.filter(id=export_id).first() @@ -88,7 +83,7 @@ def check_before_restore_export_data(cookies, export_id, destination_id, **kwarg check_export_data = ExportData.objects.filter(id=export_id, is_deleted=False) # Check export file data: /export_{process_start}/export_data_{institution_guid}_{process_start}.json if not check_export_data: - return {'open_dialog': False, 'message': f'Cannot be restored because export data does not exist', 'not_found': True} + return {'message': f'Cannot be restored because export data does not exist', 'not_found': True} # Update status RUNNING for export data for checking connect to destination storage export_data = check_export_data[0] pre_status = export_data.status @@ -100,13 +95,13 @@ def check_before_restore_export_data(cookies, export_id, destination_id, **kwarg # Update status COMPLETED for export data if raise error export_data.status = pre_status export_data.save() - return {'open_dialog': False, 'message': f'The export data files are corrupted'} + return {'message': f'The export data files are corrupted'} except Exception as e: logger.error(f'Exception: {e}') # Update status COMPLETED for export data if raise exception when reading export data and checking schema export_data.status = pre_status export_data.save() - return {'open_dialog': False, 'message': f'Cannot connect to the export data storage location'} + return {'message': f'Cannot connect to the export data storage location'} # Get file info file: /export_{process_start}/file_info_{institution_guid}_{process_start}.json try: @@ -116,52 +111,24 @@ def check_before_restore_export_data(cookies, export_id, destination_id, **kwarg logger.error(f'Exception: {e}') export_data.status = pre_status export_data.save() - return {'open_dialog': False, 'message': str(e)} + return {'message': str(e)} if not len(export_data_folders): export_data.status = pre_status export_data.save() - return {'open_dialog': False, 'message': f'The export data files are corrupted'} + return {'message': f'The export data files are corrupted'} - # Check whether the restore destination storage is not empty + # Check whether the restore destination storage exists destination_region = Region.objects.filter(id=destination_id).first() if not destination_region: export_data.status = pre_status export_data.save() - return {'open_dialog': False, 'message': f'Failed to get destination storage information'} - - destination_provider = destination_region.provider_name - if utils.is_add_on_storage(destination_provider): - try: - project_ids = {item.get('project', {}).get('id') for item in export_data_folders} - for project_id in project_ids: - destination_base_url = destination_region.waterbutler_url - response = utils.get_file_data(project_id, destination_provider, '/', cookies, - destination_base_url, get_file_info=True, **kwargs) - if response.status_code != status.HTTP_200_OK: - # Error - logger.error(f'Return error with response: {response.content}') - export_data.status = pre_status - export_data.save() - return {'open_dialog': False, 'message': f'Cannot connect to destination storage'} - - response_body = response.json() - data = response_body.get('data') - if len(data) != 0: - # Destination storage is not empty, show confirm dialog - export_data.status = pre_status - export_data.save() - return {'open_dialog': True} - except Exception as e: - logger.error(f'Exception: {e}') - export_data.status = pre_status - export_data.save() - return {'open_dialog': False, 'message': f'Cannot connect to destination storage'} + return {'message': f'Failed to get destination storage information'} export_data.status = pre_status export_data.save() - # Destination storage is empty, return False - return {'open_dialog': False} + # Return empty dict + return {} def prepare_for_restore_export_data_process(cookies, export_id, destination_id, list_project_id, creator, **kwargs): @@ -205,11 +172,6 @@ def restore_export_data_process(task, cookies, export_id, export_data_restore_id destination_region = export_data_restore.destination destination_provider = destination_region.provider_name - if utils.is_add_on_storage(destination_provider): - # Move all existing files/folders in destination to backup_{process_start} folder - project_ids = {item.get('project', {}).get('id') for item in export_data_folders} - for project_id in project_ids: - move_all_files_to_backup_folder(task, current_process_step, project_id, export_data_restore, cookies, **kwargs) check_if_restore_process_stopped(task, current_process_step) current_process_step = 2 @@ -251,8 +213,6 @@ def restore_export_data_process(task, cookies, export_id, export_data_restore_id if task.is_aborted(): task.update_state(state=ABORTED, meta={'current_restore_step': current_process_step}) - else: - restore_export_data_rollback_process(task, cookies, export_id, export_data_restore_id, process_step=current_process_step, **kwargs) raise e @@ -286,7 +246,6 @@ def post(self, request, *args, **kwargs): export_id = self.kwargs.get('export_id') cookie = request.user.get_or_create_cookie().decode() kwargs.setdefault('cookie', cookie) - cookies = request.COOKIES if not destination_id or not export_id or not task_id: return Response({'message': f'Missing required parameters.'}, status=status.HTTP_400_BAD_REQUEST) @@ -334,61 +293,7 @@ def post(self, request, *args, **kwargs): export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), status=ExportData.STATUS_STOPPED) - # Start rollback restore export data process - process = tasks.run_restore_export_data_rollback_process.delay( - cookies, - export_id, - export_data_restore.pk, - current_progress_step, - cookie=cookie, - ) - return Response({'task_id': process.task_id}, status=status.HTTP_200_OK) - - -def restore_export_data_rollback_process(task, cookies, export_id, export_data_restore_id, process_step, **kwargs): - export_data_restore = ExportDataRestore.objects.get(pk=export_data_restore_id) - export_data_restore.update(task_id=task.request.id) - - destination_provider = export_data_restore.destination.provider_name - if process_step == 0 or not utils.is_add_on_storage(destination_provider): - # If storage is bulk-mount method or the restore process has not changed anything related to files, then do nothing - export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), - status=ExportData.STATUS_STOPPED) - return {'message': 'Stop restore data successfully.'} - - try: - with transaction.atomic(): - export_data = ExportData.objects.filter(id=export_id, is_deleted=False)[0] - # File info file: /export_{process_start}/file_info_{institution_guid}_{process_start}.json - file_info_json = read_file_info_and_check_schema(export_data, cookies, **kwargs) - if file_info_json is None: - raise ProcessError(f'Cannot get file information list') - file_info_files = file_info_json.get('files', []) - - if len(file_info_files) == 0: - export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), - status=ExportData.STATUS_STOPPED) - return {'message': 'Stop restore data successfully.'} - destination_first_project_id = file_info_files[0].get('project', {}).get('id') - - location_id = export_data.location.id - # Delete files, except the backup folder. - if process_step == 2 or process_step == 3: - delete_all_files_except_backup_folder( - export_data_restore, location_id, destination_first_project_id, - cookies, **kwargs) - - # Move all files from the backup folder out and delete backup folder - if 0 < process_step < 4: - move_all_files_from_backup_folder_to_root(export_data_restore, destination_first_project_id, cookies, **kwargs) - - export_data_restore.update(process_end=timezone.make_naive(timezone.now(), timezone.utc), - status=ExportData.STATUS_STOPPED) - except Exception as e: - export_data_restore.update(status=ExportData.STATUS_ERROR) - raise e - - return {'message': 'Stop restore data successfully.'} + return Response({'message': 'Stop restore data successfully.'}, status=status.HTTP_200_OK) class CheckTaskStatusRestoreDataActionView(RdmPermissionMixin, APIView): @@ -531,44 +436,6 @@ def recalculate_user_quota(destination_region): update_user_used_quota(user, storage_type=UserQuota.CUSTOM_STORAGE) -def move_all_files_to_backup_folder(task, current_process_step, destination_first_project_id, export_data_restore, cookies, **kwargs): - try: - destination_region = export_data_restore.destination - destination_provider = destination_region.provider_name - destination_base_url = destination_region.waterbutler_url - is_destination_addon_storage = utils.is_add_on_storage(destination_provider) - destination_provider = destination_provider if is_destination_addon_storage else INSTITUTIONAL_STORAGE_PROVIDER_NAME - with transaction.atomic(): - # Preload params to function - check_task_aborted_function = partial( - check_if_restore_process_stopped, - task=task, - current_process_step=current_process_step) - - # move all old data in restore destination storage to a folder to back up folder - if is_destination_addon_storage and destination_provider != 'onedrivebusiness': - move_folder_to_backup = partial(utils.move_addon_folder_to_backup) - else: - move_folder_to_backup = partial(utils.move_bulk_mount_folder_to_backup) - response = move_folder_to_backup( - destination_first_project_id, - destination_provider, - process_start=export_data_restore.process_start_timestamp, - cookies=cookies, - callback_log=False, - base_url=destination_base_url, - check_abort_task=check_task_aborted_function, **kwargs) - check_if_restore_process_stopped(task, current_process_step) - if response and 'error' in response: - # Error - error_msg = response.get('error') - logger.error(f'Move all files to backup folder error message: {error_msg}') - raise ProcessError(f'Failed to move files to backup folder.') - except Exception as e: - logger.error(f'Move all files to backup folder exception: {e}') - raise ProcessError(f'Failed to move files to backup folder.') - - def create_folder_in_destination(task, current_process_step, export_data_folders, export_data_restore, cookies, **kwargs): destination_region = export_data_restore.destination @@ -765,68 +632,3 @@ def add_tag_and_timestamp_to_database(task, current_process_step, list_created_f # Add timestamp to DB add_timestamp_to_file_node(node, project_id, file_timestamp) check_if_restore_process_stopped(task, current_process_step) - - -def delete_all_files_except_backup_folder(export_data_restore, location_id, destination_first_project_id, cookies, **kwargs): - destination_region = export_data_restore.destination - destination_base_url = destination_region.waterbutler_url - destination_provider = destination_region.provider_name if utils.is_add_on_storage( - destination_region.provider_name) else INSTITUTIONAL_STORAGE_PROVIDER_NAME - - try: - utils.delete_all_files_except_backup( - destination_first_project_id, destination_provider, - cookies, location_id, destination_base_url, **kwargs) - except Exception as e: - logger.error(f'Delete all files exception: {e}') - raise ProcessError(f'Cannot delete files except backup folders') - - -def move_all_files_from_backup_folder_to_root(export_data_restore, destination_first_project_id, cookies, **kwargs): - destination_region = export_data_restore.destination - destination_provider = destination_region.provider_name - destination_base_url = destination_region.waterbutler_url - is_destination_addon_storage = utils.is_add_on_storage(destination_provider) - destination_provider = destination_provider if is_destination_addon_storage else INSTITUTIONAL_STORAGE_PROVIDER_NAME - - try: - if is_destination_addon_storage and destination_provider != 'onedrivebusiness': - move_folder_from_backup = partial(utils.move_addon_folder_from_backup) - else: - move_folder_from_backup = partial(utils.move_bulk_mount_folder_from_backup) - response = move_folder_from_backup( - destination_first_project_id, - destination_provider, - process_start=export_data_restore.process_start_timestamp, - cookies=cookies, - callback_log=False, - base_url=destination_base_url, - **kwargs) - if 'error' in response: - # Error - error_msg = response.get('error') - logger.error(f'Move all files from back up error message: {error_msg}') - raise ProcessError(f'Failed to move backup folder to root') - except Exception as e: - logger.error(f'Move all files from back up exception: {e}') - raise ProcessError(f'Failed to move backup folder to root') - - -class CheckRunningRestoreActionView(RdmPermissionMixin, APIView): - raise_exception = True - authentication_classes = ( - drf_authentication.SessionAuthentication, - ) - - def get(self, request, **kwargs): - destination_id = request.GET.get('destination_id') - running_restore = ExportDataRestore.objects.filter(destination_id=destination_id).exclude( - Q(status=ExportData.STATUS_STOPPED) | Q(status=ExportData.STATUS_COMPLETED) | Q( - status=ExportData.STATUS_ERROR)) - task_id = None - if len(running_restore) != 0: - task_id = running_restore[0].task_id - response = { - 'task_id': task_id - } - return Response(response, status=status.HTTP_200_OK) diff --git a/admin/rdm_custom_storage_location/tasks.py b/admin/rdm_custom_storage_location/tasks.py index 42c61fe91e4..1db197dc038 100644 --- a/admin/rdm_custom_storage_location/tasks.py +++ b/admin/rdm_custom_storage_location/tasks.py @@ -11,7 +11,6 @@ 'run_export_data_process', 'run_export_data_rollback_process', 'run_restore_export_data_process', - 'run_restore_export_data_rollback_process', ] @@ -28,8 +27,3 @@ def run_export_data_rollback_process(self, cookies, export_data_id, **kwargs): @celery_app.task(bind=True, base=AbortableTask, track_started=True) def run_restore_export_data_process(self, cookies, export_id, export_data_restore_id, list_project_id, **kwargs): return restore.restore_export_data_process(self, cookies, export_id, export_data_restore_id, list_project_id, **kwargs) - - -@celery_app.task(bind=True, base=AbortableTask, track_started=True) -def run_restore_export_data_rollback_process(self, cookies, export_id, export_data_restore_id, process_step, **kwargs): - return restore.restore_export_data_rollback_process(self, cookies, export_id, export_data_restore_id, process_step, **kwargs) diff --git a/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js b/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js index b2413c41033..d33455898ad 100644 --- a/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js +++ b/admin/static/js/rdm_custom_storage_location/rdm-institutional-storage-page.js @@ -1621,10 +1621,8 @@ $('#stop_restore_button').on('click', function () { return; } stop_restore_task_id = response['task_id']; - $osf.growl(_('Stop Restore Export Data'), 'Stop restoring in background.', 'success', growlBoxDelay); - setTimeout(function () { - checkTaskStatus(stop_restore_task_id, 'Stop Restore'); - }, intervalCheckStatus); + enableRestoreFunction(); + $osf.growl(_('Stop Restore Export Data'), _('Stopped restoring data process.'), 'success', 0); }).fail(function (jqXHR) { enableStopRestoreFunction(); var data = jqXHR.responseJSON; @@ -1679,10 +1677,6 @@ function checkTaskStatus(task_id, task_type) { $('.table-ng-file-restore-not-exist').html(text_show_file); $('.table-ng-file-restore-not-exist').css('word-break', 'break-word'); } - } else if (result_task_type === 'Stop Restore') { - // Done stopping restore export data - enableRestoreFunction(); - $osf.growl(_('Stop Restore Export Data'), _('Stopped restoring data process.'), 'success', 0); } } else if (state === 'PENDING' || state === 'STARTED') { // Redo check task status after 2 seconds @@ -1701,12 +1695,7 @@ function checkTaskStatus(task_id, task_type) { $osf.growl(title, _('Stopped restoring data process.'), 'danger', 0); } if (result && result['message']) { - var title = ''; - if (result_task_type === 'Restore'){ - title = _('Restore Export Data'); - } else if (result_task_type === 'Stop Restore') { - title = _('Stop Restore Export Data'); - } + var title = _('Restore Export Data'); $osf.growl(title, _(result['message']), 'danger', 0); } } @@ -1714,49 +1703,12 @@ function checkTaskStatus(task_id, task_type) { enableRestoreFunction(); var data = jqXHR.responseJSON; if (data && data['result']) { - var title = ''; - if (task_type === 'Restore'){ - title = _('Restore Export Data'); - } else if (task_type === 'Stop Restore') { - title = _('Stop Restore Export Data'); - } + var title = _('Restore Export Data'); $osf.growl(title, _(data['result']), 'danger', 0); } }); } -// Catch event when click Restore button in modal on the DataInformation screen -$('#start_restore_modal_button').on('click', function () { - var data = {}; - data['destination_id'] = $('#destination_storage').val(); - data['is_from_confirm_dialog'] = true; - // Call enableStopRestoreFunction() when click Restore button - closeGrowl(); - $.ajax({ - url: 'restore_export_data/', - type: 'post', - data: data - }).done(function (response) { - // Get task_id when call ajax successful - restore_task_id = response['task_id']; - if (!restore_task_id) { - return; - } - enableStopRestoreFunction(); - setTimeout(function () { - checkTaskStatus(restore_task_id, 'Restore'); - }, intervalCheckStatus); - }).fail(function (jqXHR) { - // Call enableRestoreFunction() when fail - enableRestoreFunction(); - var data = jqXHR.responseJSON; - if (data && data['message']) { - $osf.growl(_('Restore Export Data'), _(data['message']), 'danger', growlBoxDelay); - } - }); -}); - - // Start - Check Restore exported data - Actions $('#check_restore_button').on('click', function () { diff --git a/admin/templates/rdm_custom_storage_location/export_data_information.html b/admin/templates/rdm_custom_storage_location/export_data_information.html index f41811dfe31..a921791e298 100644 --- a/admin/templates/rdm_custom_storage_location/export_data_information.html +++ b/admin/templates/rdm_custom_storage_location/export_data_information.html @@ -323,31 +323,6 @@

{% trans "File list does not exist on the stora - -