Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7b86083
connect to calculations and systems through tasks
JFRudzinski Apr 9, 2025
0e91b8e
Create section run
ladinesa Apr 29, 2025
57e491f
added calculation and method
JFRudzinski Apr 30, 2025
657d680
remove logger debug warning
JFRudzinski Apr 30, 2025
c27fd1d
Add run sub-sections
ladinesa May 6, 2025
1f6d761
check for geom-opt
JFRudzinski May 7, 2025
88bca74
debugging tests
JFRudzinski May 8, 2025
9ea8ebd
fixed test, filter for SinglePoints
JFRudzinski May 8, 2025
0edcc01
fixed missing workflow2 section
JFRudzinski May 8, 2025
17fc14c
new structure with GO as input
JFRudzinski May 21, 2025
cb3dc2b
check and fill task inputs
JFRudzinski May 21, 2025
d9a05c6
added proxy vs. non-proxy support for system section, debugging comme…
JFRudzinski May 21, 2025
085ef61
improved comparison of m_proxy_value in case of no proxy
JFRudzinski May 21, 2025
c8cf76d
some reorg and simplification
JFRudzinski May 21, 2025
868f62b
save before refactor for non-proxy
JFRudzinski May 30, 2025
cb233fa
refactored for non-proxy, some unit test, have not tested yet
JFRudzinski May 30, 2025
393f26f
extend for non-proxy sections
JFRudzinski Jun 3, 2025
cee83c5
non-proxy compatibility for identifying system, but not comparing tas…
JFRudzinski Jun 3, 2025
f840c0f
simplified approach + clean
JFRudzinski Jun 4, 2025
7c7dde6
revert to full paths in yaml
JFRudzinski Jun 6, 2025
3771792
reinsert old test
JFRudzinski Jun 6, 2025
28a1b0b
revert shortcut sections implementation and add safetly guards for ro…
JFRudzinski Jun 9, 2025
a6d126f
working test without check for SinglePoints
JFRudzinski Jun 9, 2025
37a6af1
add back failing code
JFRudzinski Jun 9, 2025
bc0d77f
working version with helper function for global path from proxy/non-p…
JFRudzinski Jun 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 184 additions & 3 deletions simulationworkflowschema/equation_of_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from nomad.atomutils import get_volume
from nomad.datamodel.data import ArchiveSection
from nomad.units import ureg
from nomad.metainfo import SubSection, Section, Quantity
from nomad.metainfo import SubSection, Section, Quantity, MProxy
from nomad.datamodel.metainfo.workflow import Link
from .general import (
SimulationWorkflowMethod,
Expand All @@ -30,6 +30,9 @@
WORKFLOW_METHOD_NAME,
WORKFLOW_RESULTS_NAME,
)
from .single_point import SinglePoint
from runschema.run import Run, Program
from runschema.system import System


class EquationOfStateMethod(SimulationWorkflowMethod):
Expand Down Expand Up @@ -147,9 +150,110 @@ class EquationOfState(ParallelSimulation):

results = SubSection(sub_section=EquationOfStateResults)

# ! For default path code in normalize
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
# self.default_archive_paths = {
# 'input': 'run/0/system/-1',
# 'task': 'workflow2',
# }

# def get_default_archive_path(self, raw_proxy_value, section_type='') -> str:
# """
# Returns a certain archive path if the raw proxy value points to the root of the archive.
# """
# if raw_proxy_value is None:
# return ''

# if '#/' in raw_proxy_value:
# _, after = raw_proxy_value.split('#/', 1)
# if after:
# return ''
# else:
# return self.default_archive_paths.get(section_type, '')
# else:
# return ''

def get_section_global_path(self, section: ArchiveSection) -> str:
"""
Returns the global path of a section in the archive.
"""
if isinstance(section, MProxy):
return section.m_proxy_value
else:
archive_root = section.m_root()
archive_metadata = (
archive_root.metadata if archive_root is not None else None
)
if not archive_metadata:
return None

entry_id = archive_metadata.entry_id
path = section.m_path()
return f'../upload/archive/{entry_id}#{path}' if entry_id and path else None

def normalize(self, archive, logger):
super().normalize(archive, logger)

flag_input_structure = False
input_path_global = ''
input_archive_root = None
# find the input structure
if self.inputs:
for input_item in self.inputs:
# if isinstance(input_item.section, MProxy):
# input_path_global = input_item.section.m_proxy_value
input_path_global = self.get_section_global_path(input_item.section)
input_section = input_item.section.m_resolved()

# ! For replacing short-hand sections with standardized paths
# default_path = self.get_default_archive_path(
# input_path_global, section_type='input'
# )
# logger.warning(f'default_path: {default_path}')
# if default_path != '':
# archive_root = archive.m_context.resolve_archive(input_path_global)
# input_section = archive_root.m_resolve(default_path)
if not isinstance(input_section, System):
continue

flag_input_structure = True
# ! No longer needed
# archive_root = (
# archive.m_context.resolve_archive(input_path_global)
# if input_path_global
# else archive_root
# )
# ! Goes with default path code above
# input_proxy_value = input_path_global + default_path
system_index = input_section.m_parent_index
Comment thread
JFRudzinski marked this conversation as resolved.
run_section = input_section.m_parent
run_index = run_section.m_parent_index
input_name = input_item.name
if input_archive_root:
if system_index == -1:
system_index = len(input_archive_root.run[run_index].system) - 1
if not archive.run:
run = Run(program=Program())
try:
run.system.extend([input_section])
run.method.extend(input_archive_root.run[run_index].method)
for calc in input_archive_root.run[run_index].calculation:
if calc.system_ref.m_parent_index == system_index:
run.calculation.extend([calc])
break
except Exception:
logger.warning(
'Failed to create run section from input structure. '
)

archive.run.append(run)

break

if not flag_input_structure:
logger.warning('No input structure found in EOS workflow normalizer.')

if not self.method:
self.method = EquationOfStateMethod()
self.inputs.append(Link(name=WORKFLOW_METHOD_NAME, section=self.method))
Expand All @@ -158,9 +262,65 @@ def normalize(self, archive, logger):
self.results = EquationOfStateResults()
self.outputs.append(Link(name=WORKFLOW_RESULTS_NAME, section=self.results))

if not self._calculations:
#! Causing test to fail
try:
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ladinesa

I've adjusted the code now so that the test passes without the code within this try statement. When I include this check for single point workflows I get:

ERROR nomad.normalizing:metainfo.py:39 {"event": "could not normalize section", "exception": "Traceback (most recent call last):\n File "/home/jfrudzinski/work/soft/nomad-distro-dev-run-schema-2025-04/packages/nomad-FAIR/nomad/normalizing/metainfo.py", line 37, in normalize_section\n normalize(archive, logger)\n File "/home/jfrudzinski/work/soft/nomad-distro-dev-run-schema-2025-04/packages/nomad-FAIR/nomad/datamodel/datamodel.py", line 1233, in normalize\n if not archive.metadata.entry_type:\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^\nAttributeError: 'NoneType' object has no attribute 'entry_type'", "normalizer": "MetainfoNormalizer", "section": "EntryArchive", "timestamp": "2025-06-09 16:37.36"}

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i thought i already fixed this months ago. i put a returm if metadata is none maybe it was reverted accindentally i will fix it.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🙏 🙏

task_archives = [task.task.m_root() for task in self.tasks]
assert all(
isinstance(task_archive.workflow2, SinglePoint)
for task_archive in task_archives
)
except Exception:
logger.warning(
'Not all tasks are SinglePoints or failed to retrieve task archives. EOS workflow may be incomplete or incorrect.'
)
Comment thread
JFRudzinski marked this conversation as resolved.
return

for task in self.tasks:
# TODO - I need an alternative method to get the full input section path
# ! m_proxy_value is not available for "noraml sections"
# logger.warning(f'task: {task.task}')
# logger.warning(f'task.section: {task.task.section}')
# raw_proxy_value = task.section.m_proxy_value
# default_path = self.get_default_archive_path(
# raw_proxy_value, section_type='task'
# )
# if default_path:
# # replace the task section with the default for tasks
# # task.section = task.section.m_xpath(default_path)
# archive_root = archive.m_context.resolve_archive(raw_proxy_value)
# task.section = archive_root.m_resolve(default_path)

# TODO - Add global output to each task output?

if input_path_global:
# input_proxy_values = [
# input.section.m_proxy_value for input in task.inputs
# ]
input_proxy_values = [
self.get_section_global_path(input.section) for input in task.inputs
]
if input_path_global in input_proxy_values:
index = input_proxy_values.index(input_path_global)
task.inputs[index].name = input_name
else:
task.inputs.append(Link(name=input_name, section=input_section))

if not self._calculations:
# try to get calculations from tasks (in case of instantiation from workflow yaml)
try:
self._calculations = [
task.task.results.calculations_ref[0] for task in self.tasks
]
except Exception:
pass

if not self._systems:
# try to get systems from calculations (in case of instantiation from workflow yaml)
try:
self._systems = [calc.system_ref for calc in self._calculations]
except Exception:
pass

if self.results.energies is None:
try:
self.results.energies = [
Expand Down Expand Up @@ -210,4 +370,25 @@ def normalize(self, archive, logger):
)
self.results.eos_fit.append(eos_fit)
except Exception:
self.logger.warning('EOS fit not succesful.')
logger.warning('EOS fit not succesful.')

# @staticmethod
# def archive_path_to_jmespath(path: str) -> str:
# """
# Converts an archive path like 'run/0/system/-1' to a jmespath like 'run[0].system[-1]'.
# """
# if not path:
# return ''
# parts = path.strip('/').split('/')
# jmes = []
# i = 0
# while i < len(parts):
# part = parts[i]
# # If next part is an integer, treat as index
# if i + 1 < len(parts) and parts[i + 1].lstrip('-').isdigit():
# jmes.append(f"{part}[{parts[i + 1]}]")
# i += 2
# else:
# jmes.append(part)
# i += 1
# return '.'.join(jmes)
2 changes: 2 additions & 0 deletions tests/test_simulationworkflowschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,8 @@ def parse_trajectory(filename):
return archive


# ! This parser seems to has very strange behavior, none of the basic m_xxx
# ! attributes are defined for the section refs of the workflow
def test_eos_workflow():
archive = parse_trajectory('tests/data/ase/Cu.traj')

Expand Down
Loading