Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@
}
],
"wk:itemLinks": [],
"wk:metadataAutoFill": true
"wk:metadataAutoFill": true,
"wk:researchmapLinkage": true
},
{
"@id": "ro-crate-metadata.json",
Expand Down
279 changes: 279 additions & 0 deletions modules/weko-search-ui/tests/test_mapper.py

Large diffs are not rendered by default.

15 changes: 10 additions & 5 deletions modules/weko-search-ui/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,14 +1064,17 @@ def test_handle_validate_item_import(app, mocker_itemtype, mocker):
}

list_record = [
{
{
"metadata": {
'item_xxx': {
'subitem_yyy':[
'subitem_yyy':[
{"subitem_zzz": 123}
]
}
}
},
"warnings": [
"existing_warning"
]
}
]
with app.test_request_context():
Expand All @@ -1081,6 +1084,7 @@ def test_handle_validate_item_import(app, mocker_itemtype, mocker):
target = list_record[0]["metadata"]['item_xxx']['subitem_yyy'][0]["subitem_zzz"]
assert any("Replace value of" in w for w in warnings)
assert any("is different from existing" in w for w in warnings)
assert any("existing_warning" in w for w in warnings)
assert type(target) == str

list_record[0]["metadata"]['item_xxx']['subitem_yyy'][0]["subitem_zzz"] = 456
Expand All @@ -1091,6 +1095,7 @@ def test_handle_validate_item_import(app, mocker_itemtype, mocker):
warnings = result[0].get("warnings", [])
assert any("へ置き換えました。" in w for w in warnings)
assert any("と異なっています。" in w for w in warnings)
assert any("existing_warning" in w for w in warnings)

schema = {
"type": "object",
Expand All @@ -1112,7 +1117,7 @@ def test_handle_validate_item_import(app, mocker_itemtype, mocker):
}

list_record = [
{
{
"metadata": {
'item_aaa': {
'subitem_bbb': {
Expand All @@ -1126,7 +1131,7 @@ def test_handle_validate_item_import(app, mocker_itemtype, mocker):
with set_locale("en"):
result = handle_validate_item_import(list_record, schema)
assert "errors" in result[0]

with app.test_request_context():
with set_locale("ja"):
result = handle_validate_item_import(list_record, schema)
Expand Down
6 changes: 6 additions & 0 deletions modules/weko-search-ui/weko_search_ui/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,12 @@
WEKO_SEARCH_UI_BULK_EXPORT_RETRY_INTERVAL = 1
""" retry interval(sec) """

WEKO_SEARCH_UI_IMPORT_REPLACE_RULES = {}
"""Strings to be replaced during item import."""

WEKO_SEARCH_UI_IMPORT_REPLACE_RULE_MAP = {}
"""Mapping of jsonld_mappings table 'id' to replacement rule keys."""

CELERY_RESULT_PERSISTENT = True
""" If set to True, result messages will be persistent. This means the messages will not be lost after a broker restart. The default is for the results to be transient."""
CELERY_TASK_TRACK_STARTED=True
Expand Down
118 changes: 118 additions & 0 deletions modules/weko-search-ui/weko_search_ui/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from urllib.parse import urlparse

from flask import current_app, url_for
from flask_babelex import lazy_gettext as _

from invenio_pidstore.models import PersistentIdentifier
from weko_records.api import (
Expand Down Expand Up @@ -1380,6 +1381,114 @@ def find_similar_key(target_key, threshold=0.8):

return errors if errors else None

def apply_import_replace_rules(self, metadata, info):
"""
Apply import replacement rules to metadata.
Args:
metadata (dict): Metadata to apply replacement rules to.
info (dict): System information dictionary to log warnings.
Returns:
metadata (dict): Metadata after applying replacement rules.
info (dict): Updated system information with warnings.
"""
warning_list = []
try:
mapping_id = self.mapping_id

rules = current_app.config.get(
"WEKO_SEARCH_UI_IMPORT_REPLACE_RULES", {})
rule_map = current_app.config.get(
"WEKO_SEARCH_UI_IMPORT_REPLACE_RULE_MAP", {})
if not (
isinstance(rules, dict) and
isinstance(rule_map, dict)
):
raise ValueError(
_("The type of the jsonld mapping replacement rule is invalid."))

rule_keys = rule_map.get(str(mapping_id), [])
if not isinstance(rule_keys, list):
raise ValueError(
_("The type of the jsonld mapping replacement rule is invalid."))

for rule_id in rule_keys:
if rule_id not in rules:
warning_list.append(
_("Required replacement rule: '%(rule_id)s' is missing.",\
rule_id=rule_id))
continue

rule = rules.get(rule_id, None)
if not isinstance(rule, dict):
warning_list.append(
_("Replacement rule: '%(rule_id)s' is invalid.",
rule_id=rule_id))
continue

from_str = rule.get("from", None)
to_str = rule.get("to", None)
target_path_list = rule.get("target_path", [])

if not(
isinstance(from_str, str) and
from_str != "" and
isinstance(to_str, str) and
isinstance(target_path_list, list)
):
warning_list.append(
_("Replacement rule: '%(rule_id)s' is invalid.",
rule_id=rule_id))
continue

is_regex = rule.get("is_regex", False)
if not isinstance(is_regex, bool):
warning_list.append(
_("Replacement rule: '%(rule_id)s' - 'is_regex' is "
"not boolean. Treated as False.",
rule_id=rule_id))
is_regex = False

try:
for path_key in target_path_list:
for meta_key in list(metadata.keys()):
meta_key_no_index = re.sub(r'\[\d+\]', '', meta_key)
if meta_key_no_index == path_key:
metadata_value = metadata[meta_key]
if is_regex:
metadata[path_key] = \
re.sub(from_str, lambda m: to_str,
metadata_value)
else:
metadata[path_key] = \
metadata_value.replace(from_str, to_str)
except re.error as e:
warning_list.append(_("Replacement rule: '%(rule_id)s' - "
"regex error: %(error)s",\
rule_id=rule_id, error=str(e)))

if warning_list:
raise ValueError(warning_list)
return metadata, info

except Exception as e:
info_warnings = info.get("warnings", [])
if isinstance(e, ValueError) and isinstance(e.args[0], list):
for warn in e.args[0]:
if 'is_regex' in warn:
warning_message = str(warn)
else:
warning_message = str(_(
"Replacement failed.: %(warn)s", warn=warn))
current_app.logger.warning(warning_message)
info_warnings.append(warning_message)
else:
warning_message = str(_(
"Replacement failed.: %(warning)s", warning=e))
current_app.logger.warning(warning_message)
info_warnings.append(warning_message)
info["warnings"] = info_warnings
return metadata, info

def to_item_metadata(self, json_ld):
"""Map to item type metadata.

Expand Down Expand Up @@ -1458,6 +1567,10 @@ def is_url(s: str) -> bool:
"warnings": [],
}

# Execute replacement process for metadata
metadata, system_info = \
self.apply_import_replace_rules(metadata, system_info)

missing_metadata = {}

from flask_babelex import gettext as _
Expand Down Expand Up @@ -1733,6 +1846,8 @@ def _resolve_link(value):
]
system_info["save_as_is"] = extracted.get("wk:saveAsIs", False)
system_info["metadata_replace"] = extracted.get("wk:metadataReplace", False)
system_info["researchmap_linkage"] = extracted.get(
"wk:researchmapLinkage", False)

for relation in extracted.get("jpcoar:relation", []):
relation_id = relation.get("jpcoar:relatedIdentifier") or {}
Expand Down Expand Up @@ -2426,6 +2541,9 @@ def dereference(keys, initial_entity=None):
# wk:metadaAutoFill
rocrate.root_dataset["wk:metadataAutoFill"] = False

# wk:researchmapLinkage
rocrate.root_dataset["wk:researchmapLinkage"] = False

return rocrate


Expand Down
Binary file not shown.
Loading
Loading