diff --git a/REUSE.toml b/REUSE.toml
index b3033158..c81fab97 100644
--- a/REUSE.toml
+++ b/REUSE.toml
@@ -17,3 +17,9 @@ path = ["REUSE.toml"]
precedence = "aggregate"
SPDX-FileCopyrightText = "German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf, Forschungszentrum Jülich"
SPDX-License-Identifier = "CC0-1.0"
+
+[[annotations]]
+path = ["src/**/*.py", "test/**/*.py"]
+precedence = "aggregate"
+SPDX-FileCopyrightText = "German Aerospace Center (DLR), Helmholtz-Zentrum Dresden-Rossendorf, Forschungszentrum Jülich"
+SPDX-License-Identifier = "Apache-2.0"
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 9ee616da..7ab7b582 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -102,7 +102,7 @@ def read_version_from_pyproject():
'sphinx_togglebutton',
'sphinxcontrib.datatemplates',
# Custom extensions, see `_ext` directory.
- 'plugin_markup',
+ # 'plugin_markup',
]
language = 'en'
diff --git a/docs/source/dev/data_model.md b/docs/source/dev/data_model.md
index a5f4c727..9077b0d3 100644
--- a/docs/source/dev/data_model.md
+++ b/docs/source/dev/data_model.md
@@ -1,27 +1,286 @@
-# HERMES Data Model
+# Data model
-*hermes* uses an internal data model to store the output of the different stages.
-All the data is collected in a directory called `.hermes` located in the root of the project directory.
+`hermes`' internal data model acts like a contract between `hermes` and plugins.
+It is based on [**JSON-LD (JSON Linked Data)**](https://json-ld.org/), and
+the public API simplifies interaction with the data model through Python code.
-You should not need to interact with this data directly.
-Instead, use {class}`hermes.model.context.HermesContext` and respective subclasses to access the data in a consistent way.
+Output of the different `hermes` commands consequently is valid JSON-LD, serialized as JSON, that is cached in
+subdirectories of the `.hermes/` directory that is created in the root of the project directory.
+The cache is purely for internal purposes, its data should not be interacted with.
-## Harvest Data
+Depending on whether you develop a plugin for `hermes`, or you develop `hermes` itself, you need to know either [_some_](#json-ld-for-plugin-developers),
+or _quite a few_ things about JSON-LD.
-The data of the havesters is cached in the sub-directory `.hermes/harvest`.
-Each harvester has a separate cache file to allow parallel harvesting.
-The cache file is encoded in JSON and stored in `.hermes/harvest/HARVESTER_NAME.json`
-where `HARVESTER_NAME` corresponds to the entry point name.
+The following sections provide documentation of the data model.
+They aim to help you get started with `hermes` plugin and core development,
+even if you have no previous experience with JSON-LD.
-{class}`hermes.model.context.HermesHarvestContext` encapsulates these harvester caches.
+## The data model for plugin developers
+
+If you develop a plugin for `hermes`, you will only need to work with a single Python class and the public API
+it provides: {class}`hermes.model.SoftwareMetadata`.
+
+To work with this class, it is necessary that you know _some_ things about JSON-LD.
+
+### JSON-LD for plugin developers
+
+```{attention}
+Work in progress.
+```
+
+
+### Working with the `hermes` data model in plugins
+
+> **Goal**
+> Understand how plugins access the `hermes` data model and interact with it.
+
+`hermes` aims to hide as much of the data model as possible behind a public API
+to avoid that plugin developers have to deal with some of the more complex features of JSON-LD.
+
+#### Model instances in different types of plugin
+
+You can extend `hermes` with plugins for three different commands: `harvest`, `curate`, `deposit`.
+
+The commands differ in how they work with instances of the data model.
+
+- `harvest` plugins _create_ a single new model instance and return it.
+- `curate` plugins are passed a single existing model instance (the output of `process`),
+and return a single model instance.
+- `deposit` plugins are passed a single existing model instance (the output of `curate`),
+and return a single model instance.
+
+#### How plugins work with the API
+
+```{important}
+Plugins access the data model _exclusively_ through the API class {class}`hermes.model.SoftwareMetadata`.
+```
+
+The following sections show how this class works.
+
+##### Creating a data model instance
+
+Model instances are primarily created in `harvest` plugins, but may also be created in other plugins to map
+existing data into.
+
+To create a new model instance, initialize {class}`hermes.model.SoftwareMetadata`:
+
+```{code-block} python
+:caption: Initializing a default data model instance
+from hermes.model import SoftwareMetadata
+
+data = SoftwareMetadata()
+```
+
+`SoftwareMetadata` objects initialized without arguments provide the default _context_
+(see [_JSON-LD for plugin developers_](#json-ld-for-plugin-developers)).
+This means that now, you can use terms from the schemas included in the default context to describe software metadata.
+
+Terms from [_CodeMeta_](https://codemeta.github.io/terms/) can be used without a prefix:
+
+```{code-block} python
+:caption: Using terms from the default schema
+data["readme"] = ...
+```
+
+Terms from [_Schema.org_](https://schema.org/) can be used with the prefix `schema`:
+
+```{code-block} python
+:caption: Using terms from a non-default schema
+data["schema:copyrightNotice"] = ...
+```
+
+You can also use other linked data vocabularies. To do this, you need to identify them with a prefix and register them
+with the data model by passing it `extra_vocabs` as a `dict` mapping prefixes to URLs where the vocabularies are
+provided as JSON-LD:
+
+```{code-block} python
+:caption: Injecting additional schemas
+from hermes.model import SoftwareMetadata
+
+# Contents served at https://bar.net/schema.jsonld:
+# {
+# "@context":
+# {
+# "name": "https://schema.org/name"
+# }
+# }
+
+data = SoftwareMetadata(extra_vocabs={"foo": "https://bar.net/schema.jsonld"})
+
+data["foo:name"] = ...
+```
+
+##### Adding data
+
+Once you have an instance of {class}`hermes.model.SoftwareMetadata`, you can add data to it,
+i.e., metadata that describes software:
+
+```{code-block} python
+:caption: Setting data values
+data["name"] = "My Research Software" # A simple "Text"-type value
+# → Simplified model representation : { "name": [ "My Research Software" ] }
+# Cf. "Accessing data" below
+data["author"] = {"name": "Shakespeare"} # An object value that uses terms available in the defined context
+# → Simplified model representation : { "name": [ "My Research Software" ], "author": [ { "name": "Shakespeare" } ] }
+# Cf. "Accessing data" below
+```
+
+##### Accessing data
+
+You need to be able to access data in the data model instance to add, edit or remove data.
+Data can be accessed by using term strings, similar to how values in Python `dict`s are accessed by keys.
+
+```{important}
+When you access data from a data model instance,
+it will always be returned in a **list**-like object!
+```
+
+The reason for providing data in list-like objects is that JSON-LD treats all property values as arrays.
+Even if you add "single value" data to a `hermes` data model instance via the API, the underlying JSON-LD model
+will treat it as an array, i.e., a list-like object:
+
+```{code-block} python
+:caption: Internal data values are arrays
+data["name"] = "My Research Software" # → [ "My Research Software" ]
+data["author"] = {"name": "Shakespeare"} # → [ { "name": [ "Shakespeare" ] } ]
+```
+
+Therefore, you access data in the same way you would access data from a Python `list`:
+
+1. You access single values using indices, e.g., `data["name"][0]`.
+2. You can use a list-like API to interact with data objects, e.g.,
+`data["name"].append("Hamilton")`, `data["name"].extend(["Hamilton", "Knuth"])`, `for name in data["name"]: ...`, etc.
+
+##### Interacting with data
+
+The following longer example shows different ways that you can interact with `SoftwareMetadata` objects and the data API.
+
+```{code-block} python
+:caption: Building the data model
+from hermes.model import SoftwareMetadata
+
+# Create the model object with the default context
+data = SoftwareMetadata()
+
+# Let's create author metadata for our software!
+# Below each line of code, the value of `data["author"]` is given.
+
+data["author"] = {"name": "Shakespeare"}
+# → [{'name': ['Shakespeare']}]
+
+data["author"].append({"name": "Hamilton"})
+# [{'name': ['Shakespeare']}, {'name': ['Hamilton']}]
+
+data["author"][0]["email"] = "shakespeare@baz.net"
+# [{'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, {'name': ['Hamilton']}]
+
+data["author"][1]["email"].append("hamilton@baz.net")
+# [{'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}, {'name': ['Hamilton'], 'email': ['hamilton@baz.net']}]
+
+data["author"][1]["email"].extend(["hamilton@spam.org", "hamilton@eggs.com"])
+# [
+# {'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']},
+# {'name': ['Hamilton'], 'email': ['hamilton@baz.net', 'hamilton@spam.org', 'hamilton@eggs.com']}
+# ]
+```
+
+The example continues to show how to iterate through data.
+
+```{code-block} python
+:caption: for-loop, containment check
+for i, author in enumerate(data["author"], start=1):
+ if author["name"][0] in ["Shakespeare", "Hamilton"]:
+ print(f"Author {i} has expected name.")
+ else:
+ raise ValueError("Unexpected author name found!", author["name"][0])
+
+# Mock output:
+# $> Author 1 has expected name.
+# $> Author 2 has expected name.
+```
+
+```{code-block} python
+:caption: Value check
+for email in data["author"][0]["email"]:
+ if email.endswith(".edu"):
+ print("Shakespeare has an email address at an educational institution.")
+ else:
+ print("Cannot confirm affiliation with educational institution for Shakespeare.")
+
+# Mock output
+# $> Cannot confirm affiliation with educational institution for author.
+```
+
+```{code-block} python
+:caption: Value check and list comprehension
+if all(["hamilton" in email for email in data["author"][1]["email"]]):
+ print("Author has only emails with their name in it.")
+
+# Mock output
+# $> Author has only emails with their name in it.
+```
+
+The example continues to show how to assert data values.
+
+As mentioned in the [introduction to the data model](#data-model),
+`hermes` uses a JSON-LD-like internal data model.
+The API class {class}`hermes.model.SoftwareMetadata` hides many
+of the more complex aspects of JSON-LD and makes it easy to work
+with the data model.
+
+So the API class hides the internal model objects.
+Therefore, they work as you would expect from plain
+Python data:
+
+```{code-block} python
+:caption: Naive containment assertion that raises
+:emphasize-lines: 5,13
+try:
+ assert (
+ {'name': ['Shakespeare'], 'email': ['shakespeare@baz.net']}
+ in
+ data["author"]
+ )
+ print("The author was found!")
+except AssertionError:
+ print("The author could not be found.")
+ raise
+
+# Mock output
+# $> The author was found!
+#
+#
+# Internal Model from data["author"]:
+# {'@list': [
+# {
+# 'http://schema.org/name': [{'@value': 'Shakespeare'}],
+# 'http://schema.org/email': [{'@value': 'shakespeare@baz.net'}]
+# },
+# {
+# 'http://schema.org/name': [{'@value': 'Hamilton'}],
+# 'http://schema.org/email': [
+# {'@list': [
+# {'@value': 'hamilton@baz.net'}, {'@value': 'hamilton@spam.org'}, {'@value': 'hamilton@eggs.com'}
+# ]}
+# ]
+# }]
+# }
+# )
+```
+
+---
+
+## See Also
+
+- API reference: {class}`hermes.model.SoftwareMetadata`
diff --git a/poetry.lock b/poetry.lock
index 74361955..27b40260 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
[[package]]
name = "accessible-pygments"
@@ -927,7 +927,7 @@ version = "3.0.2"
description = "Safely add untrusted strings to HTML/XML markup."
optional = false
python-versions = ">=3.9"
-groups = ["dev", "docs"]
+groups = ["docs"]
files = [
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -1574,21 +1574,6 @@ pytest = ">=4.6"
[package.extras]
testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
-[[package]]
-name = "pytest-httpserver"
-version = "1.1.3"
-description = "pytest-httpserver is a httpserver for pytest"
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
- {file = "pytest_httpserver-1.1.3-py3-none-any.whl", hash = "sha256:5f84757810233e19e2bb5287f3826a71c97a3740abe3a363af9155c0f82fdbb9"},
- {file = "pytest_httpserver-1.1.3.tar.gz", hash = "sha256:af819d6b533f84b4680b9416a5b3f67f1df3701f1da54924afd4d6e4ba5917ec"},
-]
-
-[package.dependencies]
-Werkzeug = ">=2.0.0"
-
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@@ -1699,14 +1684,14 @@ files = [
[[package]]
name = "rdflib"
-version = "7.1.4"
+version = "7.5.0"
description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
optional = false
-python-versions = "<4.0.0,>=3.8.1"
+python-versions = ">=3.8.1"
groups = ["main"]
files = [
- {file = "rdflib-7.1.4-py3-none-any.whl", hash = "sha256:72f4adb1990fa5241abd22ddaf36d7cafa5d91d9ff2ba13f3086d339b213d997"},
- {file = "rdflib-7.1.4.tar.gz", hash = "sha256:fed46e24f26a788e2ab8e445f7077f00edcf95abb73bcef4b86cefa8b62dd174"},
+ {file = "rdflib-7.5.0-py3-none-any.whl", hash = "sha256:b011dfc40d0fc8a44252e906dcd8fc806a7859bc231be190c37e9568a31ac572"},
+ {file = "rdflib-7.5.0.tar.gz", hash = "sha256:663083443908b1830e567350d72e74d9948b310f827966358d76eebdc92bf592"},
]
[package.dependencies]
@@ -1719,6 +1704,7 @@ html = ["html5rdf (>=1.2,<2)"]
lxml = ["lxml (>=4.3,<6.0)"]
networkx = ["networkx (>=2,<4)"]
orjson = ["orjson (>=3.9.14,<4)"]
+rdf4j = ["httpx (>=0.28.1,<0.29.0)"]
[[package]]
name = "requests"
@@ -2454,24 +2440,6 @@ h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
-[[package]]
-name = "werkzeug"
-version = "3.1.3"
-description = "The comprehensive WSGI web application library."
-optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
-files = [
- {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"},
- {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"},
-]
-
-[package.dependencies]
-MarkupSafe = ">=2.1.1"
-
-[package.extras]
-watchdog = ["watchdog (>=2.3)"]
-
[[package]]
name = "wheel"
version = "0.45.1"
@@ -2579,4 +2547,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.10, <4.0.0"
-content-hash = "2e2405b30c3dee4416a6e77828c7cff1197a8be71665770bcbdb308c19ef4358"
+content-hash = "e76de51d1f5dd86486d4cc24a5cdf7d007b16ce5d9d0cc3f7d0f353cf0defff0"
diff --git a/pyproject.toml b/pyproject.toml
index 49b64ee7..f9588a75 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,7 +78,6 @@ pytest-cov = "^3.0.0"
taskipy = "^1.10.3"
flake8 = "^5.0.4"
requests-mock = "^1.10.0"
-pytest-httpserver = "^1.1.3"
# Packages for developers for creating documentation
[tool.poetry.group.docs]
diff --git a/src/hermes/model/__init__.py b/src/hermes/model/__init__.py
index faf5a2f5..4a4bca25 100644
--- a/src/hermes/model/__init__.py
+++ b/src/hermes/model/__init__.py
@@ -1,3 +1,5 @@
# SPDX-FileCopyrightText: 2022 German Aerospace Center (DLR)
#
# SPDX-License-Identifier: Apache-2.0
+
+from hermes.model.api import SoftwareMetadata
diff --git a/src/hermes/model/api.py b/src/hermes/model/api.py
new file mode 100644
index 00000000..8b079544
--- /dev/null
+++ b/src/hermes/model/api.py
@@ -0,0 +1,10 @@
+from hermes.model.types import ld_dict
+
+from hermes.model.types.ld_context import ALL_CONTEXTS
+
+
+class SoftwareMetadata(ld_dict):
+
+ def __init__(self, data: dict = None, extra_vocabs: dict[str, str] = None) -> None:
+ ctx = ALL_CONTEXTS + [{**extra_vocabs}] if extra_vocabs is not None else ALL_CONTEXTS
+ super().__init__([ld_dict.from_dict(data, context=ctx).data_dict if data else {}], context=ctx)
diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py
index 8ab05171..df6aed71 100644
--- a/src/hermes/model/types/__init__.py
+++ b/src/hermes/model/types/__init__.py
@@ -5,29 +5,19 @@
# SPDX-FileContributor: Michael Meinel
# SPDX-FileContributor: Michael Fritzsche
-from datetime import date, time, datetime
-
from .ld_container import ld_container
-from .ld_list import ld_list
from .ld_dict import ld_dict
-from .ld_context import iri_map
+from .ld_list import ld_list
from .pyld_util import JsonLdProcessor
_TYPEMAP = [
- # Conversion routines for ld_container
- (
- lambda c: isinstance(c, ld_container),
- {
- "ld_container": lambda c, **_: c,
- "json": lambda c, **_: c.compact(),
- "expanded_json": lambda c, **_: c.ld_value,
- }
- ),
+ # Conversion routine for ld_container
+ (lambda c: isinstance(c, ld_container), {"ld_container": lambda c, **_: c}),
# Wrap item from ld_dict in ld_list
(ld_list.is_ld_list, {"ld_container": ld_list}),
- (lambda c: isinstance(c, list), {"ld_container": lambda c, **kw: ld_list(c, **kw)}),
+ (lambda c: isinstance(c, list), {"ld_container": ld_list}),
# pythonize items from lists (expanded set is already handled above)
(ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}),
@@ -36,35 +26,6 @@
(ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}),
(ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}),
(lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}),
-
- # Convert internal data types to expanded_json
- (ld_container.is_json_id, {"expanded_json": lambda c, **_: [c]}),
- (ld_container.is_ld_id, {"expanded_json": lambda c, **_: c}),
- (ld_container.is_json_value, {"expanded_json": lambda c, **_: [c]}),
- (ld_container.is_ld_value, {"expanded_json": lambda c, **_: c}),
- (ld_dict.is_json_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value}),
- (
- ld_list.is_container,
- {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c), **kw).ld_value}
- ),
- (
- ld_list.is_ld_list,
- {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c[0]), **kw).ld_value}
- ),
- (lambda c: isinstance(c, list), {"expanded_json": lambda c, **kw: ld_list.from_list(c, **kw).ld_value}),
- (lambda v: isinstance(v, (int, float, str, bool)), {"expanded_json": lambda v, **_: [{"@value": v}]}),
- (
- lambda v: isinstance(v, datetime),
- {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]}
- ),
- (
- lambda v: isinstance(v, date),
- {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]}
- ),
- (
- lambda v: isinstance(v, time),
- {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]}
- ),
]
diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py
index 88d92795..c8ab051f 100644
--- a/src/hermes/model/types/ld_container.py
+++ b/src/hermes/model/types/ld_container.py
@@ -5,28 +5,37 @@
# SPDX-FileContributor: Michael Meinel
# SPDX-FileContributor: Michael Fritzsche
-from .pyld_util import JsonLdProcessor, bundled_loader
-
-from datetime import date, time, datetime
-from typing import Union, Self, Any
+from __future__ import annotations
-
-JSON_LD_CONTEXT_DICT = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]]
-BASIC_TYPE = Union[str, float, int, bool]
-EXPANDED_JSON_LD_VALUE = list[dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]]]
-COMPACTED_JSON_LD_VALUE = Union[
- list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]],
- dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]],
-]
-TIME_TYPE = Union[datetime, date, time]
-JSON_LD_VALUE = Union[
- list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]],
- dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]],
-]
-PYTHONIZED_LD_CONTAINER = Union[
- list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]],
- dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]],
-]
+from .pyld_util import JsonLdProcessor, bundled_loader
+from datetime import date, datetime, time
+
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from .ld_dict import ld_dict
+ from .ld_list import ld_list
+ from typing import Any, TypeAlias, Union
+ from typing_extensions import Self
+ JSON_LD_CONTEXT_DICT: TypeAlias = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]]
+ BASIC_TYPE: TypeAlias = Union[str, float, int, bool]
+ EXPANDED_JSON_LD_VALUE: TypeAlias = list[Union[
+ dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]],
+ "EXPANDED_JSON_LD_VALUE",
+ str
+ ]]
+ COMPACTED_JSON_LD_VALUE: TypeAlias = Union[
+ list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]],
+ dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]],
+ ]
+ TIME_TYPE: TypeAlias = Union[datetime, date, time]
+ JSON_LD_VALUE: TypeAlias = Union[
+ list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]],
+ dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]],
+ ]
+ PYTHONIZED_LD_CONTAINER: TypeAlias = Union[
+ list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]],
+ dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]],
+ ]
class ld_container:
@@ -64,7 +73,7 @@ def __init__(
self: Self,
data: EXPANDED_JSON_LD_VALUE,
*,
- parent: Union["ld_container", None] = None,
+ parent: Union[ld_dict, ld_list, None] = None,
key: Union[str, None] = None,
index: Union[int, None] = None,
context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None,
@@ -77,7 +86,7 @@ def __init__(
:param data: The expanded json-ld data that is mapped.
:type data: EXPANDED_JSON_LD_VALUE
:param parent: parent node of this container.
- :type parent: ld_container | None
+ :type parent: ld_dict | ld_list | None
:param key: key into the parent container.
:type key: str | None
:param index: index into the parent container.
@@ -177,7 +186,7 @@ def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE:
def _to_python(
self: Self, full_iri: str, ld_value: Union[list, dict, str]
- ) -> Union["ld_container", BASIC_TYPE, TIME_TYPE]:
+ ) -> Union[ld_dict, ld_list, BASIC_TYPE, TIME_TYPE]:
"""
Returns a pythonized version of the given value pretending the value is in self and full_iri its key.
@@ -190,7 +199,7 @@ def _to_python(
:type ld_value: list | dict | str
:return: The pythonized value of the ld_value.
- :rtype: ld_container | BASIC_TYPE | TIME_TYPE
+ :rtype: ld_dict | ld_list | BASIC_TYPE | TIME_TYPE
"""
if full_iri == "@id":
# values of key "@id" only have to be compacted
@@ -237,7 +246,7 @@ def _to_expanded_json(
# while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list
parent = self
path = []
- while parent.__class__.__name__ != "ld_dict":
+ while parent.__class__.__name__ not in {"ld_dict", "SoftwareMetadata"}:
if parent.container_type == "@list":
path.extend(["@list", 0])
elif parent.container_type == "@graph":
@@ -250,7 +259,7 @@ def _to_expanded_json(
# if neither self nor any of its parents is a ld_dict:
# create a dict with the key of the outer most parent of self and this parents ld_value as a value
# this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible
- if parent.__class__.__name__ != "ld_dict":
+ if parent.__class__.__name__ not in {"ld_dict", "SoftwareMetadata"}:
key = self.ld_proc.expand_iri(parent.active_ctx, parent.key)
parent = ld_container([{key: parent._data}])
path.append(0)
diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py
index 589e5246..a6b9de09 100644
--- a/src/hermes/model/types/ld_dict.py
+++ b/src/hermes/model/types/ld_dict.py
@@ -5,42 +5,179 @@
# SPDX-FileContributor: Michael Meinel
# SPDX-FileContributor: Michael Fritzsche
-from .ld_container import ld_container
+from __future__ import annotations
from .pyld_util import bundled_loader
+from .ld_container import ld_container
+
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from collections.abc import Generator, Iterator, KeysView
+ from .ld_container import (
+ JSON_LD_CONTEXT_DICT,
+ EXPANDED_JSON_LD_VALUE,
+ PYTHONIZED_LD_CONTAINER,
+ JSON_LD_VALUE,
+ TIME_TYPE,
+ BASIC_TYPE,
+ )
+ from .ld_list import ld_list
+ from typing import Any, Union, Literal
+ from typing_extensions import Self
class ld_dict(ld_container):
+ """
+ An JSON-LD container resembling a dict.
+ See also :class:`ld_container`
+
+ :ivar ref: A dict used to reference this object by its id. (Its form is {"@id": ...})
+ :ivartype ref: dict[Literal["@id"], str]
+
+ :cvar container_type: A type used as a placeholder to represent "no default".
+ :cvartype container_type: type[str]
+ """
_NO_DEFAULT = type("NO DEFAULT")
- def __init__(self, data, *, parent=None, key=None, index=None, context=None):
+ def __init__(
+ self: Self,
+ data: list[dict[str, EXPANDED_JSON_LD_VALUE]],
+ *,
+ parent: Union[ld_dict, ld_list, None] = None,
+ key: Union[str, None] = None,
+ index: Union[int, None] = None,
+ context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None
+ ) -> None:
+ """
+ Create a new instance of an ld_dict.
+
+ :param self: The instance of ld_container to be initialized.
+ :type self: Self
+ :param data: The expanded json-ld data that is mapped.
+ :type data: EXPANDED_JSON_LD_VALUE
+ :param parent: parent node of this container.
+ :type parent: ld_dict | ld_list | None
+ :param key: key into the parent container.
+ :type key: str | None
+ :param index: index into the parent container.
+ :type index: int | None
+ :param context: local context for this container.
+ :type context: list[str | JSON_LD_CONTEXT_DICT] | None
+
+ :return:
+ :rtype: None
+
+ :raises ValueError: If the given data doesn't represent an ld_dict.
+ """
+ # check for validity of data
if not self.is_ld_dict(data):
raise ValueError("The given data does not represent a ld_dict.")
+ self.data_dict = data[0]
+ # call super constructor
super().__init__(data, parent=parent, key=key, index=index, context=context)
- self.data_dict = data[0]
+ def __getitem__(self: Self, key: str) -> ld_list:
+ """
+ Get the item with the given key in a pythonized form.
+ If self contains no key, value pair with the given key, then an empty list is added as its value and returned.
+
+ :param self: The ld_dict the item is taken from.
+ :type self: ld_dict
+ :param key: The key (compacted or expanded) to the item.
+ :type key: str
- def __getitem__(self, key):
+ :return: The pythonized item at the key.
+ :rtype: ld_list
+ """
full_iri = self.ld_proc.expand_iri(self.active_ctx, key)
+ if full_iri not in self.data_dict:
+ self[full_iri] = []
ld_value = self.data_dict[full_iri]
return self._to_python(full_iri, ld_value)
- def __setitem__(self, key, value):
- full_iri = self.ld_proc.expand_iri(self.active_ctx, key)
- ld_value = self._to_expanded_json({full_iri: value})
+ def __setitem__(self: Self, key: str, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None:
+ """
+ Set the item at the given key to the given value or delete it if value is None.
+ The given value is expanded.
+
+ :param self: The ld_dict the item is set in.
+ :type self: ld_dict
+ :param key: The key at which the item is set.
+ :type key: str
+ :param value: The new value.
+ :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
+
+ :return:
+ :rtype: None
+ """
+ # if the value is None delete the entry instead of updating it
+ if value is None:
+ del self[self.ld_proc.expand_iri(self.active_ctx, key)]
+ return
+ # expand the key, value pair and update data_dict
+ ld_value = self._to_expanded_json({key: value})
self.data_dict.update(ld_value)
- def __delitem__(self, key):
+ def __delitem__(self: Self, key: str) -> None:
+ """
+ Delete the key, value pair with the given value pair.
+ Note that if a deleted object is represented by an ld_container druing this process it will still exist
+ and not be modified afterwards.
+
+ :param self: The ld_dict the key, value pair is deleted from.
+ :type self: ld_dict
+ :param key: The key (expanded or compacted) of the key, value pair that is deleted.
+ :type key: str
+
+ :return:
+ :rtype: None
+ """
+ # expand key and delete the key, value pair
full_iri = self.ld_proc.expand_iri(self.active_ctx, key)
del self.data_dict[full_iri]
- def __contains__(self, key):
+ def __contains__(self: Self, key: str) -> bool:
+ """
+ Returns whether or not self contains a key, value pair with the given key.
+
+ :param self: The ld_dict that is checked if it a key, value pair with the given key.
+ :type self: ld_dict
+ :param key: The key for which it is checked if a key, value pair is contained in self.
+ :type key: str
+
+ :return: Whether or not self contains a key, value pair with the given key.
+ :rtype: bool
+ """
+ # expand the key and check if self contains a key, value pair with it
full_iri = self.ld_proc.expand_iri(self.active_ctx, key)
+ # FIXME: is that good?
return full_iri in self.data_dict
- def __eq__(self, other):
+ def __eq__(
+ self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]
+ ) -> bool:
+ """
+ Returns wheter or not self is considered to be equal to other.
+ If other is not an ld_dict, it is converted first.
+ If an id check is possible return its result otherwise:
+ For each key, value pair its value is compared to the value with the same key in other.
+ Note that due to those circumstances equality is not transitve
+ meaning if a == b and b == c it is not guaranteed that a == c.
+
+ :param self: The ld_dict other is compared to.
+ :type self: ld_dict
+ :param other: The dict/ ld_dict self is compared to.
+ :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]
+
+ :return: Whether or not self and other are considered equal.
+ If other is of the wrong type return the NotImplemented singleton instead.
+ :rtype: bool
+ """
+ # check if other has an acceptable type
if not isinstance(other, (dict, ld_dict)):
return NotImplemented
+
+ # compare in the special case that other is a json_id or json_value
if ld_container.is_json_id(other):
if "@id" in self:
return self["@id"] == other["@id"]
@@ -49,62 +186,147 @@ def __eq__(self, other):
if {*self.keys()}.issubset({"@id", *other.keys()}):
return ld_container.are_values_equal(self.data_dict, other)
return False
+
+ # convert into an ld_dict if other is not one
if isinstance(other, dict):
other = self.from_dict(other, parent=self.parent, key=self.key, context=self.context)
+
+ # check for id equality
if "@id" in self and "@id" in other:
return self["@id"] == other["@id"]
+
+ # test for value equality
keys_self = {*self.keys()}
keys_other = {*other.keys()}
unique_keys = keys_self.symmetric_difference(keys_other)
if unique_keys and unique_keys != {"@id"}:
+ # there is a key that isn't "@id" that is only in other or self
return False
+ # check if the values with the same key are equal
for key in keys_self.intersection(keys_other):
- item = self[key]
- other_item = other[key]
- res = item.__eq__(other_item)
- if res == NotImplemented:
- res = other_item.__eq__(item)
- if res is False or res == NotImplemented: # res is not True
+ if self[key] != other[key]:
return False
return True
- def __ne__(self, other):
+ def __ne__(
+ self: Self, other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]
+ ) -> bool:
+ """
+ Returns whether or not self and other not considered to be equal.
+ (Returns not self.__eq__(other) if the return type is bool.
+ See ld_list.__eq__ for more details on the comparison.)
+
+ :param self: The ld_dict other is compared to.
+ :type self: ld_dict
+ :param other: The dict/ ld_dict self is compared to.
+ :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]
+
+ :return: Whether or not self and other are not considered equal.
+ If other is of the wrong type return the NotImplemented singleton instead.
+ :rtype: bool
+ """
+ # compare self and other using __eq__
x = self.__eq__(other)
+ # return NotImplemented if __eq__ did so and else the inverted result of __eq__
if x is NotImplemented:
return NotImplemented
return not x
- def get(self, key, default=_NO_DEFAULT):
+ def get(
+ self: Self, key: str, default: Any = _NO_DEFAULT
+ ) -> Union[ld_list, Any]:
+ """
+ Get the item with the given key in a pythonized form using the build in get.
+ If a KeyError is raised, return the default or reraise it if no default is given.
+
+ :param self: The ld_dict the item is taken from.
+ :type self: ld_dict
+ :param key: The key (compacted or expanded) to the item.
+ :type key: str
+
+ :return: The pythonized item at the key.
+ :rtype: ld_list
+
+ :raises KeyError: If the build in get raised a KeyError.
+ """
try:
- value = self[key]
- return value
+ return self[key]
except KeyError as e:
- if default is not ld_dict._NO_DEFAULT:
- return default
- raise e
+ if default is self._NO_DEFAULT:
+ raise e
+ return default
+
+ def update(
+ self: Self,
+ other: Union[ld_dict, dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]
+ ) -> None:
+ """
+ Set the items at the given keys to the given values or delete it if value is None by using build in set.
+
+ :param self: The ld_dict the items are set in.
+ :type self: ld_dict
+ :param other: The key, value pairs giving the new values and their keys.
+ :type other: ld_dict | dict[str, JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]
- def update(self, other):
+ :return:
+ :rtype: None
+ """
for key, value in other.items():
self[key] = value
- def keys(self):
+ def keys(self: Self) -> KeysView[str]:
+ """
+ Return the keys of the key, value pairs of self.
+
+ :param self: The ld_dict whose keys are returned.
+ :type self: ld_dict
+ """
return self.data_dict.keys()
- def compact_keys(self):
+ def compact_keys(self: Self) -> Iterator[str]:
+ """
+ Return an iterator of the compacted keys of the key, value pairs of self.
+
+ :param self: The ld_dict whose compacted keys are returned.
+ :type self: ld_dict
+ """
return map(
lambda k: self.ld_proc.compact_iri(self.active_ctx, k),
self.data_dict.keys()
)
- def items(self):
+ def items(self: Self) -> Generator[tuple[str, ld_list], None, None]:
+ """
+ Return an generator of tuples of keys and their values in self.
+
+ :param self: The ld_dict whose items are returned.
+ :type self: ld_dict
+ """
for k in self.data_dict.keys():
yield k, self[k]
@property
- def ref(self):
+ def ref(self: Self) -> dict[Literal["@id"], str]:
+ """
+ Return the dict used to reference this object by its id. (Its form is {"@id": ...})
+
+ :param self: The ld_dict whose reference is returned.
+ :type self: ld_dict
+
+ :raises KeyError: If self has no id.
+ """
return {"@id": self.data_dict['@id']}
- def to_python(self):
+ def to_python(self: Self) -> dict[str, Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CONTAINER]]:
+ """
+ Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts.
+
+ :param self: The ld_dict whose fully pythonized version is returned.
+ :type self: ld_dict
+
+ :return: The fully pythonized version of self.
+ :rtype: dict[str, BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER]
+ """
res = {}
for key in self.compact_keys():
value = self[key]
@@ -113,14 +335,42 @@ def to_python(self):
res[key] = value
return res
+ # FIXME: Allow from_dict to handle dicts containing ld_dicts and ld_lists
@classmethod
- def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None):
- ld_data = value.copy()
+ def from_dict(
+ cls: type[Self],
+ value: dict[str, PYTHONIZED_LD_CONTAINER],
+ *,
+ parent: Union[ld_dict, ld_list, None] = None,
+ key: Union[str, None] = None,
+ context: Union[str, JSON_LD_CONTEXT_DICT, list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None,
+ ld_type: Union[str, list[str], None] = None
+ ) -> ld_dict:
+ """
+ Creates a ld_dict from the given dict with the given parent, key, context and ld_type.
+ Uses the expansion of the JSON-LD Processor and not the one of ld_container.
+
+ :param value: The dict of values the ld_dict should be created from.
+ :type value: dict[str, PYTHONIZED_LD_CONTAINER]
+ :param parent: The parent container of the new ld_list.
+ :type parent: ld_dict | ld_list | None
+ :param key: The key into the inner most parent container representing a dict of the new ld_list.
+ :type: key: str | None
+ :param context: The context for the new dict (it will also inherit the context of parent).
+ :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None
+ :param ld_type: Additional value(s) for the new dict.
+ :type ld_type: str | list[str] | None
+ :return: The new ld_dict build from value.
+ :rtype: ld_dict
+ """
+ # make a copy of value and add the new type to it.
+ ld_data = value.copy()
ld_type = ld_container.merge_to_list(ld_type or [], ld_data.get('@type', []))
if ld_type:
ld_data["@type"] = ld_type
+ # generate the context from value, context and parent
data_context = ld_data.pop('@context', [])
merged_contexts = ld_container.merge_to_list(data_context, context or [])
full_context = []
@@ -129,17 +379,40 @@ def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None):
elif parent is not None:
full_context = parent.full_context + merged_contexts
+ # expand value and create an ld_dict from it
ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader})
ld_value = cls(ld_value, parent=parent, key=key, context=merged_contexts)
return ld_value
@classmethod
- def is_ld_dict(cls, ld_value):
+ def is_ld_dict(cls: type[Self], ld_value: Any) -> bool:
+ """
+ Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ I.e. if ld_value is a list containing a dict containing none of the keys "@set", "@graph", "@list" and "@value"
+ and not only the key "@id".
+
+ :param ld_value: The value that is checked.
+ :type ld_value: Any
+
+ :returns: Wheter or not ld_value could represent an expanded json-ld dict.
+ :rtype: bool
+ """
return cls.is_ld_node(ld_value) and cls.is_json_dict(ld_value[0])
@classmethod
- def is_json_dict(cls, ld_value):
+ def is_json_dict(cls: type[Self], ld_value: Any) -> bool:
+ """
+ Returns wheter the given value is considered to be possible of representing an expanded json-ld dict.
+ I.e. if ld_value is a dict containing none of the keys "@set", "@graph", "@list" and "@value"
+ and not only the key "@id".
+
+ :param ld_value: The value that is checked.
+ :type ld_value: Any
+
+ :returns: Wheter or not ld_value could represent an expanded json-ld dict.
+ :rtype: bool
+ """
if not isinstance(ld_value, dict):
return False
diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py
index c4d1c450..23ebe4d0 100644
--- a/src/hermes/model/types/ld_list.py
+++ b/src/hermes/model/types/ld_list.py
@@ -5,19 +5,25 @@
# SPDX-FileContributor: Michael Meinel
# SPDX-FileContributor: Michael Fritzsche
+from __future__ import annotations
+
+from .ld_container import ld_container
from collections import deque
-from types import NotImplementedType
-from .ld_container import (
- ld_container,
- JSON_LD_CONTEXT_DICT,
- EXPANDED_JSON_LD_VALUE,
- PYTHONIZED_LD_CONTAINER,
- JSON_LD_VALUE,
- TIME_TYPE,
- BASIC_TYPE,
-)
-from typing import Generator, Hashable, Union, Self, Any
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from collections.abc import Generator, Hashable
+ from .ld_dict import ld_dict
+ from .ld_container import (
+ JSON_LD_CONTEXT_DICT,
+ EXPANDED_JSON_LD_VALUE,
+ PYTHONIZED_LD_CONTAINER,
+ JSON_LD_VALUE,
+ TIME_TYPE,
+ BASIC_TYPE,
+ )
+ from typing import Any, Union
+ from typing_extensions import Self
class ld_list(ld_container):
@@ -33,22 +39,22 @@ class ld_list(ld_container):
def __init__(
self: Self,
- data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]],
+ data: EXPANDED_JSON_LD_VALUE,
*,
- parent: Union["ld_container", None] = None,
+ parent: Union[ld_dict, ld_list, None] = None,
key: Union[str, None] = None,
index: Union[int, None] = None,
context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None,
) -> None:
"""
- Create a new ld_list container.
+ Create a new instance of an ld_list.
:param self: The instance of ld_list to be initialized.
- :type self: Self
+ :type self: ld_list
:param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph)
- :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]]
+ :type data: EXPANDED_JSON_LD_VALUE
:param parent: parent node of this container.
- :type parent: ld_container | None
+ :type parent: ld_dict | ld_list | None
:param key: key into the parent container.
:type key: str | None
:param index: index into the parent container.
@@ -95,17 +101,17 @@ def __init__(
def __getitem__(
self: Self, index: Union[int, slice]
- ) -> Union[BASIC_TYPE, TIME_TYPE, ld_container, list[Union[BASIC_TYPE, TIME_TYPE, ld_container]]]:
+ ) -> Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list, list[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]:
"""
Get the item(s) at position index in a pythonized form.
:param self: The ld_list the items are taken from.
- :type self: Self
+ :type self: ld_list
:param index: The positon(s) from which the item(s) is/ are taken.
:type index: int | slice
:return: The pythonized item(s) at index.
- :rtype: BASIC_TYPE | TIME_TYPE | ld_container | list[BASIC_TYPE | TIME_TYPE | ld_container]]
+ :rtype: BASIC_TYPE | TIME_TYPE | ld_dict | ld_list | list[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]
"""
# handle slices by applying them to a list of indices and then getting the items at those
if isinstance(index, slice):
@@ -118,18 +124,18 @@ def __getitem__(
return item
def __setitem__(
- self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]
+ self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]
) -> None:
"""
Set the item(s) at position index to the given value(s).
All given values are expanded. If any are assimilated by self all items that would be added by this are added.
:param self: The ld_list the items are set in.
- :type self: Self
+ :type self: ld_list
:param index: The positon(s) at which the item(s) is/ are set.
:type index: int | slice
:param value: The new value(s).
- :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]
+ :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
:return:
:rtype: None
@@ -161,7 +167,7 @@ def __delitem__(self: Self, index: Union[int, slice]) -> None:
and not be modified afterwards.
:param self: The ld_list the items are deleted from.
- :type self: Self
+ :type self: ld_list
:param index: The positon(s) at which the item(s) is/ are deleted.
:type index: int | slice
@@ -175,22 +181,22 @@ def __len__(self: Self) -> int:
Returns the number of items in this ld_list.
:param self: The ld_list whose length is to be returned.
- :type self: Self
+ :type self: ld_list
:return: The length of self.
:rtype: int
"""
return len(self.item_list)
- def __iter__(self: Self) -> Generator[Union[BASIC_TYPE | TIME_TYPE | ld_container], None, None]:
+ def __iter__(self: Self) -> Generator[Union[BASIC_TYPE, TIME_TYPE, ld_dict, ld_list], None, None]:
"""
Returns an iterator over the pythonized values contained in self.
:param self: The ld_list over whose items is iterated.
- :type self: Self
+ :type self: ld_list
:return: The Iterator over self's values.
- :rtype: Generator[Union[BASIC_TYPE | TIME_TYPE | ld_container], None, None]
+ :rtype: Generator[BASIC_TYPE | TIME_TYPE | ld_dict | ld_list, None, None]
"""
# return an Iterator over each value in self in its pythonized from
for index, value in enumerate(self.item_list):
@@ -211,7 +217,7 @@ def __contains__(self: Self, value: JSON_LD_VALUE) -> bool:
has the same @id like it or it xor the object in the item_list has an id an all other values are the same.
:param self: The ld_list that is checked if it contains value.
- :type self: Self
+ :type self: ld_list
:param value: The object being checked whether or not it is in self.
:type value: JSON_LD_VALUE
@@ -239,33 +245,29 @@ def __contains__(self: Self, value: JSON_LD_VALUE) -> bool:
def __eq__(
self: Self,
- other: Union[
- "ld_list",
- list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]],
- dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]],
- ],
- ) -> Union[bool, NotImplementedType]:
+ other: Union[ld_list, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]],
+ dict[str, Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]
+ ) -> bool:
"""
Returns wheter or not self is considered to be equal to other.
If other is not an ld_list, it is converted first.
For each index it is checked if the ids of the items at index in self and other match if both have one,
if only one has or neither have an id all other values are compared.
Note that due to those circumstances equality is not transitve
- meaning if a == b and b == c is is not guaranteed that a == c.
+ meaning if a == b and b == c it is not guaranteed that a == c.
If self or other is considered unordered the comparison is more difficult. All items in self are compared
with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used
to determine if there exists a bijection reordering self so that the ordered comparison of self with other
returns true.
:param self: The ld_list other is compared to.
- :type self: Self
+ :type self: ld_list
:param other: The list/ container/ ld_list self is compared to.
- :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]
- | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]]
+ :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]
:return: Whether or not self and other are considered equal.
- If other is of the wrong type return NotImplemented instead.
- :rtype: bool | NotImplementedType
+ If other is of the wrong type return the NotImplemented singleton instead.
+ :rtype: bool
"""
# check if other has an acceptable type
if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)):
@@ -312,16 +314,8 @@ def __eq__(
if item["@id"] != other_item["@id"]:
return False
continue
- # get the 'real' items (i.e. can also be ld_dicts or ld_lists)
- item = self[index]
- other_item = other[index]
- # compare using the correct equals method
- res = item.__eq__(other_item)
- if res == NotImplemented:
- # swap order if first try returned NotImplemented
- res = other_item.__eq__(item)
- # return false if the second comparison also fails or one of them returned false
- if res is False or res == NotImplemented:
+ # compare the 'real' items (i.e. can also be ld_dicts or ld_lists)
+ if self[index] != other[index]:
return False
# return true because no unequal elements where found
return True
@@ -341,16 +335,8 @@ def __eq__(
if item["@id"] == other_item["@id"]:
equality_pairs[index] += [other_index]
continue
- # get the 'real' items (i.e. can also be ld_dicts or ld_lists)
- item = self[index]
- other_item = other[index]
- # compare using the correct equals method
- res = item.__eq__(other_item)
- if res == NotImplemented:
- # swap order if first try returned NotImplemented
- res = other_item.__eq__(item)
- # if one of both comparisons returned true the elements are equal
- if res is not NotImplemented and res:
+ # compare the 'real' items (i.e. can also be ld_dicts or ld_lists)
+ if self[index] == other[other_index]:
equality_pairs[index] += [other_index]
if len(equality_pairs[index]) == 0:
# there exists no element in other that is equal to item
@@ -370,7 +356,10 @@ def __eq__(
@classmethod
def _bfs_step(
- cls: Self, verticies1: set[Hashable], edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable],
+ cls: type[Self],
+ verticies1: set[Hashable],
+ edges: dict[Hashable, tuple[Hashable]],
+ matches: dict[Hashable, Hashable],
distances: dict[Hashable, Union[int, float]]
) -> bool:
"""
@@ -424,7 +413,10 @@ def _bfs_step(
@classmethod
def _dfs_step(
- cls: Self, ver: Hashable, edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable],
+ cls: type[Self],
+ ver: Hashable,
+ edges: dict[Hashable, tuple[Hashable]],
+ matches: dict[Hashable, Hashable],
distances: dict[Hashable, Union[int, float]]
) -> bool:
"""
@@ -468,7 +460,10 @@ def _dfs_step(
@classmethod
def _hopcroft_karp(
- cls: Self, verticies1: set[Hashable], verticies2: set[Hashable], edges: dict[Hashable, tuple[Hashable]]
+ cls: type[Self],
+ verticies1: set[Hashable],
+ verticies2: set[Hashable],
+ edges: dict[Hashable, tuple[Hashable]]
) -> int:
"""
Implementation of Hopcroft-Karp. I.e.:
@@ -509,27 +504,21 @@ def _hopcroft_karp(
return matching_size
def __ne__(
- self: Self,
- other: Union[
- "ld_list",
- list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]],
- dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]],
- ],
- ) -> Union[bool, NotImplementedType]:
+ self: Self, other: Union[ld_list, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]]
+ ) -> bool:
"""
Returns whether or not self and other not considered to be equal.
(Returns not self.__eq__(other) if the return type is bool.
See ld_list.__eq__ for more details on the comparison.)
:param self: The ld_list other is compared to.
- :type self: Self
+ :type self: ld_list
:param other: The list/ container/ ld_list self is compared to.
- :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]
- | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]]
+ :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list]
:return: Whether or not self and other are not considered equal.
- If other is of the wrong type return NotImplemented instead.
- :rtype: bool | NotImplementedType
+ If other is of the wrong type return the NotImplemented singleton instead.
+ :rtype: bool
"""
# compare self and other using __eq__
x = self.__eq__(other)
@@ -538,30 +527,30 @@ def __ne__(
return NotImplemented
return not x
- def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]) -> None:
+ def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]) -> None:
"""
Append the item to the given ld_list self.
The given value is expanded. If it is assimilated by self all items that would be added by this are added.
:param self: The ld_list the item is appended to.
- :type self: Self
+ :type self: ld_list
:param value: The new value.
- :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]
+ :type value: JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dict | ld_list
:return:
:rtype: None
"""
self.item_list.extend(self._to_expanded_json([value]))
- def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]) -> None:
+ def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_dict, ld_list]]) -> None:
"""
Append the items in value to the given ld_list self.
The given values are expanded. If any are assimilated by self all items that would be added by this are added.
:param self: The ld_list the items are appended to.
- :type self: Self
+ :type self: ld_list
:param value: The new values.
- :type value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]
+ :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_dcit | ld_list]
:return:
:rtype: None
@@ -569,15 +558,15 @@ def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, l
for item in value:
self.append(item)
- def to_python(self: Self) -> list[PYTHONIZED_LD_CONTAINER]:
+ def to_python(self: Self) -> list[Union[BASIC_TYPE, TIME_TYPE, PYTHONIZED_LD_CONTAINER]]:
"""
Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts.
:param self: The ld_list whose fully pythonized version is returned.
- :type self: Self
+ :type self: ld_list
:return: The fully pythonized version of self.
- :rtype: list[PYTHONIZED_LD_CONTAINER]
+ :rtype: list[BASIC_TYPE | TIME_TYPE | PYTHONIZED_LD_CONTAINER]
"""
return [
item.to_python() if isinstance(item, ld_container) else item
@@ -621,11 +610,11 @@ def from_list(
cls: type[Self],
value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE]],
*,
- parent: Union[ld_container, None] = None,
+ parent: Union[ld_dict, ld_list, None] = None,
key: Union[str, None] = None,
context: Union[str, JSON_LD_CONTEXT_DICT, list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None,
container_type: str = "@set"
- ) -> "ld_list":
+ ) -> ld_list:
"""
Creates a ld_list from the given list with the given parent, key, context and container_type.
Note that only container_type '@set' is valid for key '@type'.
@@ -636,10 +625,10 @@ def from_list(
:type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE]
:param parent: The parent container of the new ld_list.
If value is assimilated by parent druing JSON-LD
expansion parent is extended by value and parent is returned.
- :type parent: ld_container | None
+ :type parent: ld_dict | ld_list | None
:param key: The key into the inner most parent container representing a dict of the new ld_list.
:type: key: str | None
- :param context: The context for the new list (is will also inherit the context of parent).
+ :param context: The context for the new list (it will also inherit the context of parent).
Note that this context won't be added to parent if value is assimilated by parent and parent is returned.
:type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None
:param container_type: The container type of the new list valid are '@set', '@list' and '@graph'.
diff --git a/src/hermes/model/types/pyld_util.py b/src/hermes/model/types/pyld_util.py
index 3cf9862b..8bcef253 100644
--- a/src/hermes/model/types/pyld_util.py
+++ b/src/hermes/model/types/pyld_util.py
@@ -103,8 +103,11 @@ class JsonLdProcessor(jsonld.JsonLdProcessor):
def expand_iri(self, active_ctx: t.Any, short_iri: str, vocab: bool = True) -> str:
return self._expand_iri(active_ctx, short_iri, vocab=vocab)
- def compact_iri(self, active_ctx: t.Any, long_iri: str, vocab: bool = True) -> str:
- return self._compact_iri(active_ctx, long_iri, vocab=vocab)
+ def compact_iri(self, active_ctx: t.Any, long_iri: str, vocab: bool = True, value: dict = None) -> str:
+ for mapping in active_ctx["mappings"].values():
+ if "@container" in mapping and long_iri:
+ value = {x: "none" for x in mapping["@container"]}
+ return self._compact_iri(active_ctx, long_iri, vocab=vocab, value=value)
def initial_ctx(self, local_ctx, options=None):
return self.process_context(self._INITIAL_CONTEXT, local_ctx, options or {})
diff --git a/test/hermes_test/model/test_api.py b/test/hermes_test/model/test_api.py
new file mode 100644
index 00000000..b4bec276
--- /dev/null
+++ b/test/hermes_test/model/test_api.py
@@ -0,0 +1,136 @@
+import pytest
+
+from hermes.model import SoftwareMetadata
+from hermes.model.types import ld_list, ld_dict
+
+from hermes.model.types.ld_context import ALL_CONTEXTS
+
+EXTRA_VOCABS = {"foo": "https://bar.net/schema"}
+
+
+@pytest.fixture
+def default_context():
+ return {"@context": ALL_CONTEXTS}
+
+
+@pytest.fixture
+def custom_context():
+ return {"@context": ALL_CONTEXTS + [EXTRA_VOCABS]}
+
+
+@pytest.mark.parametrize("metadata,full_context", [
+ (SoftwareMetadata(), "default_context"),
+ (SoftwareMetadata(extra_vocabs=EXTRA_VOCABS), "custom_context"),
+])
+def test_init_no_data(metadata, full_context, request):
+ assert metadata.full_context == request.getfixturevalue(full_context)["@context"]
+
+
+@pytest.mark.parametrize("metadata,full_context", [
+ (SoftwareMetadata({"funding": "foo"}), "default_context"),
+ (SoftwareMetadata({"funding": "foo"}, extra_vocabs=EXTRA_VOCABS), "custom_context"),
+])
+def test_init_with_data(metadata, full_context, request):
+ assert metadata.full_context == request.getfixturevalue(full_context)["@context"]
+ assert metadata["funding"][0] == "foo"
+
+
+def test_init_nested_object():
+ my_software = {
+ "schema:softwareName": "MySoftware",
+ "foo:egg": "spam",
+ "foo:ham": "eggs",
+ "maintainer": {"name": "Some Name", "email": "maintainer@example.com"},
+ "author": [{"name": "Foo"}, {"name": "Bar"}],
+ }
+ data = SoftwareMetadata(my_software, extra_vocabs={"foo": "https://foo.bar"})
+ assert data["schema:softwareName"] == ["MySoftware"]
+ assert len(data["maintainer"]) == 1 and data["maintainer"][0]["name"] == ["Some Name"]
+ for author in data["author"]:
+ for name in author["name"]:
+ assert name in ["Foo", "Bar"]
+
+
+def test_append():
+ data = SoftwareMetadata()
+ data["schema:foo"].append("a")
+ assert type(data["schema:foo"]) is ld_list
+ assert data["schema:foo"][0] == "a" and data["schema:foo"].item_list == [{"@value": "a"}]
+ data["schema:foo"].append("b")
+ assert type(data["schema:foo"]) is ld_list
+ assert data["schema:foo"] == [{"@value": "a"}, {"@value": "b"}]
+ data["schema:foo"].append("c")
+ assert data["schema:foo"] == [{"@value": "a"}, {"@value": "b"}, {"@value": "c"}]
+ data = SoftwareMetadata()
+ data["schema:foo"].append({"schema:name": "bar"})
+ assert type(data["schema:foo"]) is ld_list and type(data["schema:foo"][0]) is ld_dict
+ assert data["schema:foo"] == [{"http://schema.org/name": [{"@value": "bar"}]}]
+ data["schema:foo"].append({"schema:name": "bar"})
+ assert type(data["schema:foo"]) is ld_list
+ assert data["schema:foo"] == 2 * [{"http://schema.org/name": [{"@value": "bar"}]}]
+ data["schema:foo"].append({"schema:name": "bar"})
+ assert data["schema:foo"] == 3 * [{"http://schema.org/name": [{"@value": "bar"}]}]
+
+
+def test_iterative_assignment():
+ # This tests iterative assignments/traversals to edit/appending values
+ data = SoftwareMetadata(extra_vocabs={"foo": "https://foo.bar"})
+ data["author"] = {"name": "Foo"}
+ # Look, a squirrel!
+ authors = data["author"]
+ assert isinstance(authors, ld_list)
+ author1 = authors[0]
+ author1["email"] = "author@example.com"
+ authors.append({"name": "Bar", "email": "author2@example.com"})
+ assert len(authors) == 2
+ del authors[0]
+ assert len(authors) == 1
+
+
+def test_usage():
+ data = SoftwareMetadata()
+ data["author"] = {"name": "Foo"}
+ data["author"].append({"name": "Bar"})
+ data["author"][0]["email"] = "foo@bar.net"
+ data["author"][0]["email"].append("foo@baz.com")
+ assert len(data["author"]) == 2
+ assert len(data["author"][0]["email"]) == 2
+ assert len(data["author"][1]["email"]) == 0
+ harvest = {
+ "authors": [
+ {"name": "Foo", "affiliation": ["Uni A", "Lab B"], "kw": ["a", "b", "c"]},
+ {"name": "Bar", "affiliation": ["Uni C"], "email": "bar@c.edu", "kw": "egg"},
+ {"name": "Baz", "affiliation": ["Lab E"]},
+ ]
+ }
+ for author in harvest["authors"]:
+ for exist_author in data["author"]:
+ if author["name"] in exist_author["name"]:
+ exist_author["affiliation"] = author.get("affiliation", [])
+ exist_author["email"].extend(email if isinstance((email := author.get("email", [])), list) else [email])
+ exist_author["schema:knowsAbout"].extend(kw if isinstance((kw := author.get("kw", [])), list) else [kw])
+ break
+ else:
+ data["author"].append(author)
+ assert len(data["author"]) == 3
+ foo, bar, baz = data["author"]
+ assert foo["name"][0] == "Foo"
+ assert foo["affiliation"] == ["Uni A", "Lab B"]
+ assert foo["schema:knowsAbout"] == ["a", "b", "c"]
+ assert foo["email"] == ["foo@bar.net", "foo@baz.com"]
+ assert bar["name"][0] == "Bar"
+ assert bar["affiliation"] == ["Uni C"]
+ assert bar["email"] == ["bar@c.edu"]
+ assert baz["name"][0] == "Baz"
+ assert baz["affiliation"] == ["Lab E"]
+ assert len(baz["schema:knowsAbout"]) == 0
+ assert len(baz["email"]) == 0
+ for author in data["author"]:
+ assert "name" in author
+ assert "email" in author
+ if author["schema:knowsAbout"] == ["egg"]:
+ # FIXME: None has to be discussed
+ # json-ld processor just removes it in expansion
+ author["schema:knowsAbout"] = None
+ author["schema:pronouns"] = "they/them"
+ assert len(bar["schema:knowsAbout"]) == 0
diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py
index 545b704f..66095295 100644
--- a/test/hermes_test/model/types/test_ld_dict.py
+++ b/test/hermes_test/model/types/test_ld_dict.py
@@ -189,9 +189,10 @@ def test_get():
context=[{"schema": "https://schema.org/"}])
assert di.get("https://schema.org/name") == ["Manu Sporny"]
assert di.get("schema:name") == ["Manu Sporny"]
- assert di.get("bar", None) is None
+ assert di.get("bar", None) is None # invalid key
with pytest.raises(KeyError):
di.get("bar")
+ assert isinstance(di.get("schema:bar", None), ld_list) and len(di.get("schema:bar", None)) == 0
def test_update():