From 836d76c5b351156a87c943ecc51a42687ecf200f Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Sun, 24 Aug 2025 16:50:48 +0200 Subject: [PATCH 1/7] Add SBML parser with BioModels integration - Implement SBML Level 2/3 parsing with units handling - Include boundary species filtering for ODE models - Add BioModels Database API integration for model download from CLI - Create structured data export (CSV, JSON, NPZ formats) - Add CLI commands for parsing and BioModels database access --- .gitattributes | 1 + LICENCE.txt => LICENSE.txt | 0 pyproject.toml | 2 +- requirements.txt | 2 + simba_ml/__init__.py | 3 + simba_ml/_version.py | 234 +++++---- simba_ml/cli/__main__.py | 4 + simba_ml/cli/biomodels.py | 153 ++++++ simba_ml/cli/parse_sbml.py | 170 +++++++ simba_ml/sbml_parser/__init__.py | 16 + simba_ml/sbml_parser/biomodels_api.py | 159 ++++++ simba_ml/sbml_parser/level_2/parser.py | 262 ++++++++++ simba_ml/sbml_parser/level_3/parser.py | 395 +++++++++++++++ simba_ml/sbml_parser/main_parser.py | 163 +++++++ simba_ml/sbml_parser/ml_exporter.py | 637 +++++++++++++++++++++++++ tests/sbml_parser/test_main_parser.py | 315 ++++++++++++ 16 files changed, 2392 insertions(+), 124 deletions(-) create mode 100644 .gitattributes rename LICENCE.txt => LICENSE.txt (100%) create mode 100644 simba_ml/cli/biomodels.py create mode 100644 simba_ml/cli/parse_sbml.py create mode 100644 simba_ml/sbml_parser/__init__.py create mode 100644 simba_ml/sbml_parser/biomodels_api.py create mode 100644 simba_ml/sbml_parser/level_2/parser.py create mode 100644 simba_ml/sbml_parser/level_3/parser.py create mode 100644 simba_ml/sbml_parser/main_parser.py create mode 100644 simba_ml/sbml_parser/ml_exporter.py create mode 100644 tests/sbml_parser/test_main_parser.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..97c1fff --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +simba_ml/_version.py export-subst diff --git a/LICENCE.txt b/LICENSE.txt similarity index 100% rename from LICENCE.txt rename to LICENSE.txt diff --git a/pyproject.toml b/pyproject.toml index e1b7664..139bf57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools>=42", "wheel==0.30.0", "versioneer-518"] +requires = ["setuptools>=42", "wheel", "versioneer-518"] build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a40af70..a336bb9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ scikit-learn>=1.1.3 dacite>=1.8.0 wandb>=0.13.9 click==8.1.3 +tomli>=2.0.0 # required by Python <3.11 +libsbml==5.20.5 diff --git a/simba_ml/__init__.py b/simba_ml/__init__.py index b0b3f4d..9526992 100644 --- a/simba_ml/__init__.py +++ b/simba_ml/__init__.py @@ -3,3 +3,6 @@ from simba_ml import _version __version__ = _version.get_versions()["version"] # type: ignore[no-untyped-call] + +from . import _version +__version__ = _version.get_versions()['version'] diff --git a/simba_ml/_version.py b/simba_ml/_version.py index 8bb7582..e15909b 100644 --- a/simba_ml/_version.py +++ b/simba_ml/_version.py @@ -1,4 +1,3 @@ -# mypy: ignore-errors # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag @@ -7,7 +6,7 @@ # that just contains the computed version number. # This file is released into the public domain. -# Generated by versioneer-0.28 +# Generated by versioneer-0.29 # https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -17,11 +16,11 @@ import re import subprocess import sys -from typing import Callable, Dict +from typing import Any, Callable, Dict, List, Optional, Tuple import functools -def get_keywords(): +def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must @@ -37,8 +36,15 @@ def get_keywords(): class VersioneerConfig: """Container for Versioneer configuration parameters.""" + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool -def get_config(): + +def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py @@ -60,25 +66,30 @@ class NotThisMethod(Exception): HANDLERS: Dict[str, Dict[str, Callable]] = {} -def register_vcs_handler(vcs, method): # decorator +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" - - def decorate(f): + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f - return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None - popen_kwargs = {} + popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() @@ -89,17 +100,12 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen( - [command] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - **popen_kwargs, - ) + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) break - except OSError: - e = sys.exc_info()[1] + except OSError as e: if e.errno == errno.ENOENT: continue if verbose: @@ -119,7 +125,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= return stdout, process.returncode -def versions_from_parentdir(parentdir_prefix, root, verbose): +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both @@ -131,32 +141,26 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix) :], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. - keywords = {} + keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: @@ -178,7 +182,11 @@ def git_get_keywords(versionfile_abs): @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") @@ -204,7 +212,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -213,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r"\d", r)} + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -221,35 +229,33 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix) :] + r = ref[len(tag_prefix):] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r"\d", r): + if not re.match(r'\d', r): continue if verbose: print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* @@ -267,7 +273,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -275,19 +282,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner( - GITS, - [ - "describe", - "--tags", - "--dirty", - "--always", - "--long", - "--match", - f"{tag_prefix}[[:digit:]]*", - ], - cwd=root, - ) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -297,12 +295,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() - pieces = {} + pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -342,16 +341,17 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] + git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) return pieces # tag @@ -360,12 +360,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, - ) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix) :] + pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -389,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): return pieces -def plus_or_dot(pieces): +def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" -def render_pep440(pieces): +def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you @@ -414,13 +412,14 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered -def render_pep440_branch(pieces): +def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards @@ -443,13 +442,14 @@ def render_pep440_branch(pieces): rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered -def pep440_split_post(ver): +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the @@ -459,7 +459,7 @@ def pep440_split_post(ver): return vc[0], int(vc[1] or 0) if len(vc) == 2 else None -def render_pep440_pre(pieces): +def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: @@ -483,7 +483,7 @@ def render_pep440_pre(pieces): return rendered -def render_pep440_post(pieces): +def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards @@ -510,7 +510,7 @@ def render_pep440_post(pieces): return rendered -def render_pep440_post_branch(pieces): +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. @@ -539,7 +539,7 @@ def render_pep440_post_branch(pieces): return rendered -def render_pep440_old(pieces): +def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. @@ -561,7 +561,7 @@ def render_pep440_old(pieces): return rendered -def render_git_describe(pieces): +def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. @@ -581,7 +581,7 @@ def render_git_describe(pieces): return rendered -def render_git_describe_long(pieces): +def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. @@ -601,16 +601,14 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} if not style or style == "default": style = "pep440" # the default @@ -634,16 +632,12 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} -def get_versions(): +def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some @@ -654,7 +648,8 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) except NotThisMethod: pass @@ -663,16 +658,13 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for _ in cfg.versionfile_source.split("/"): + for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None, - } + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -686,10 +678,6 @@ def get_versions(): except NotThisMethod: pass - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} diff --git a/simba_ml/cli/__main__.py b/simba_ml/cli/__main__.py index 0fe9348..78caa0e 100644 --- a/simba_ml/cli/__main__.py +++ b/simba_ml/cli/__main__.py @@ -4,6 +4,8 @@ from simba_ml.cli import generate_data from simba_ml.cli import start_prediction from simba_ml.cli.problem_viewer import run_problem_viewer +from simba_ml.cli import parse_sbml +from simba_ml.cli import biomodels @click.group() @@ -14,6 +16,8 @@ def main() -> None: main.add_command(generate_data.generate_data) main.add_command(start_prediction.start_prediction) main.add_command(run_problem_viewer.run_problem_viewer) +main.add_command(parse_sbml.parse_sbml) +main.add_command(biomodels.biomodels) if __name__ == "__main__": diff --git a/simba_ml/cli/biomodels.py b/simba_ml/cli/biomodels.py new file mode 100644 index 0000000..e653a67 --- /dev/null +++ b/simba_ml/cli/biomodels.py @@ -0,0 +1,153 @@ +import click +from simba_ml.sbml_parser.biomodels_api import BioModelsAPI, download_biomodel, search_biomodels + + +@click.group() +def biomodels(): + """BioModels Database commands.""" + pass + + +@biomodels.command() +@click.argument("model_id", type=str) +@click.option("--output-dir", "-o", default="./biomodels_downloads", help="Output directory for downloaded model") +def download(model_id, output_dir): + """Download an SBML model from BioModels Database.""" + try: + file_path = download_biomodel(model_id, output_dir) + click.echo(click.style(f"✅ Downloaded: {file_path}", fg='green')) + except Exception as e: + click.echo(click.style(f"❌ Error: {e}", fg='red'), err=True) + raise click.Abort() + + +@biomodels.command() +@click.argument("query", type=str) +@click.option("--limit", "-l", default=10, help="Maximum number of results") +@click.option("--detailed", "-d", is_flag=True, help="Show detailed information") +def search(query, limit, detailed): + """Search for models in BioModels Database.""" + try: + models = search_biomodels(query, limit) + + if not models: + click.echo(f"No models found for query: {query}") + return + + click.echo(click.style(f"Found {len(models)} models for '{query}':", fg='cyan', bold=True)) + click.echo() + + for i, model in enumerate(models, 1): + model_id = model.get('id', 'unknown') + name = model.get('name', 'No name available') + + click.echo(f"{i}. {click.style(model_id, fg='blue', bold=True)}") + click.echo(f" {name}") + + if detailed: + authors = model.get('submitter', 'Unknown authors') + publication = model.get('publication', {}) + pub_year = publication.get('year', 'Unknown year') + + click.echo(f" Authors: {authors}") + click.echo(f" Year: {pub_year}") + + if publication.get('title'): + title = publication['title'][:100] + ('...' if len(publication['title']) > 100 else '') + click.echo(f" Publication: {title}") + + click.echo() + + except Exception as e: + click.echo(click.style(f"❌ Error: {e}", fg='red'), err=True) + raise click.Abort() + + +@biomodels.command() +@click.argument("model_id", type=str) +def info(model_id): + """Get information about a specific model.""" + try: + api = BioModelsAPI() + + # Get model info + model_info = api.get_model_info(model_id) + files_info = api.get_model_files(model_id) + + click.echo(click.style(f"Model Information: {model_id}", fg='cyan', bold=True)) + click.echo("=" * 50) + + name = model_info.get('name', 'No name available') + click.echo(f"Name: {name}") + + publication = model_info.get('publication', {}) + if publication: + click.echo(f"Publication: {publication.get('title', 'No title')}") + click.echo(f"Authors: {publication.get('authors', 'Unknown')}") + click.echo(f"Year: {publication.get('year', 'Unknown')}") + + # Show available files + click.echo() + click.echo(click.style("Available Files:", fg='yellow', bold=True)) + + main_files = files_info.get('main', []) + if main_files: + click.echo("Main files:") + for f in main_files: + size = f.get('fileSize', 'unknown size') + click.echo(f" • {f['name']} ({size} bytes)") + + additional_files = files_info.get('additional', []) + if additional_files: + click.echo("Additional files:") + for f in additional_files[:5]: # Show first 5 + size = f.get('fileSize', 'unknown size') + desc = f.get('description', 'No description') + click.echo(f" • {f['name']} ({size} bytes) - {desc}") + if len(additional_files) > 5: + click.echo(f" ... and {len(additional_files) - 5} more files") + + except Exception as e: + click.echo(click.style(f"❌ Error: {e}", fg='red'), err=True) + raise click.Abort() + + +@biomodels.command() +@click.argument("model_id", type=str) +@click.option("--output-dir", "-o", default="./biomodels_downloads", help="Output directory") +def download_and_parse(model_id, output_dir): + """Download a model and immediately parse it.""" + try: + # Download model + file_path = download_biomodel(model_id, output_dir) + click.echo(click.style(f"✅ Downloaded: {file_path}", fg='green')) + + # Parse model + click.echo() + click.echo("Parsing model...") + + from simba_ml.sbml_parser.main_parser import MainSBMLParser + from simba_ml.sbml_parser.ml_exporter import SBMLMLExporter + + parser = MainSBMLParser(file_path) + result = parser.process() + exporter = SBMLMLExporter(result) + + # Show basic info + info = result['sbml_info'] + click.echo(click.style(f"Model: {info['model_name']}", fg='blue', bold=True)) + click.echo(f"SBML Level: {info['level']}, Version: {info['version']}") + click.echo(f"Species: {info['num_species']}") + click.echo(f"Reactions: {info['num_reactions']}") + click.echo(f"Parameters: {info['num_parameters']}") + + # Check if suitable for ODE + has_kinetic_laws = any(r.get('kinetic_law') is not None for r in result['reactions']) + if has_kinetic_laws: + click.echo(click.style("✅ ODE Ready: Model contains kinetic laws", fg='green')) + else: + click.echo(click.style("⚠️ No kinetic laws found", fg='yellow')) + + except Exception as e: + click.echo(click.style(f"❌ Error: {e}", fg='red'), err=True) + raise click.Abort() diff --git a/simba_ml/cli/parse_sbml.py b/simba_ml/cli/parse_sbml.py new file mode 100644 index 0000000..8886f3f --- /dev/null +++ b/simba_ml/cli/parse_sbml.py @@ -0,0 +1,170 @@ +import click +import json +import os +from simba_ml.sbml_parser.main_parser import MainSBMLParser, UnsupportedSBMLVersionError, SBMLParsingError +from simba_ml.sbml_parser.ml_exporter import SBMLMLExporter + +@click.command() +@click.argument("file", type=click.Path(exists=True)) +@click.option("--verbose", "-v", is_flag=True, help="Show detailed parsing information") +@click.option("--species-limit", "-s", default=5, help="Number of species to display (default: 5)") +@click.option("--reactions-limit", "-r", default=5, help="Number of reactions to display (default: 5)") +@click.option("--export", "-e", type=click.Choice(['csv', 'json', 'npz']), help="Export ML-ready data in specified format") +@click.option("--output-dir", "-o", default="./sbml_ml_data", help="Output directory for exported data (default: ./sbml_ml_data)") +@click.option("--quiet", "-q", is_flag=True, help="Suppress visual output, only export data") +def parse_sbml(file, verbose, species_limit, reactions_limit, export, output_dir, quiet): + """Parse an SBML file and print a summary of the model.""" + try: + sbml_parser = MainSBMLParser(file) + result = sbml_parser.process() + + # If quiet mode and no export, just export the JSON to stdout and return + if quiet and not export: + exporter = SBMLMLExporter(result) + ml_dataset = exporter.get_ml_dataset() + click.echo(json.dumps(ml_dataset, indent=2, default=str)) + return + + # Print header (unless quiet mode) + if not quiet: + click.echo(click.style("=" * 60, fg='green')) + click.echo(click.style(f"SBML Model Parsing Results", fg='green', bold=True)) + click.echo(click.style("=" * 60, fg='green')) + click.echo() + + # Basic info + info = result['sbml_info'] + + if not quiet: + click.echo(click.style(f"📄 File:", fg='blue', bold=True) + f" {file}") + click.echo(click.style(f"📋 Model:", fg='blue', bold=True) + f" {info['model_name']} (ID: {info['model_id']})") + click.echo(click.style(f"🔢 SBML Level:", fg='blue', bold=True) + f" {info['level']}, Version: {info['version']}") + click.echo() + + # Statistics + if not quiet: + click.echo(click.style("📊 Model Statistics:", fg='cyan', bold=True)) + else: + click.echo(f"📊 Model Statistics:") + click.echo(f" • Species: {info['num_species']}") + click.echo(f" • Reactions: {info['num_reactions']}") + click.echo(f" • Parameters: {info['num_parameters']}") + click.echo(f" • Compartments: {info['num_compartments']}") + + if 'num_events' in info: + click.echo(f" • Events: {info['num_events']}") + if 'num_constraints' in info: + click.echo(f" • Constraints: {info['num_constraints']}") + click.echo() + + # ODE suitability check + has_kinetic_laws = any(r.get('kinetic_law') is not None for r in result['reactions']) + if result['reactions'] and not has_kinetic_laws: + click.echo(click.style("⚠️ Warning:", fg='yellow', bold=True) + " No kinetic laws found - this model may not be suitable for ODE simulation") + elif result['reactions'] and has_kinetic_laws: + click.echo(click.style("✅ ODE Ready:", fg='green', bold=True) + " Model contains kinetic laws suitable for ODE simulation") + click.echo() + + # Sample species + if result['species']: + click.echo(click.style(f"🧬 Sample Species (showing {min(species_limit, len(result['species']))}):", fg='magenta', bold=True)) + for i, species in enumerate(result['species'][:species_limit]): + boundary = " (boundary)" if species.get('boundary_condition') else "" + initial = "" + if species.get('initial_concentration') is not None: + initial = f" [C₀={species['initial_concentration']}]" + elif species.get('initial_amount') is not None: + initial = f" [A₀={species['initial_amount']}]" + click.echo(f" {i+1}. {species['id']} in {species['compartment']}{boundary}{initial}") + if len(result['species']) > species_limit: + click.echo(f" ... and {len(result['species']) - species_limit} more") + click.echo() + + # Sample reactions + if result['reactions']: + click.echo(click.style(f"⚗️ Sample Reactions (showing {min(reactions_limit, len(result['reactions']))}):", fg='red', bold=True)) + for i, reaction in enumerate(result['reactions'][:reactions_limit]): + reactants = " + ".join([f"{r['species']}" for r in reaction.get('reactants', [])]) + products = " + ".join([f"{p['species']}" for p in reaction.get('products', [])]) + reversible = " ⇌ " if reaction.get('reversible', False) else " → " + kinetic_info = " ✓" if reaction.get('kinetic_law') else " ✗" + click.echo(f" {i+1}. {reaction['id']}: {reactants}{reversible}{products}{kinetic_info}") + if len(result['reactions']) > reactions_limit: + click.echo(f" ... and {len(result['reactions']) - reactions_limit} more") + click.echo() + + # Compartments + if result['compartments']: + click.echo(click.style("🏠 Compartments:", fg='cyan', bold=True)) + for comp in result['compartments']: + size_info = f" (size: {comp['size']})" if comp.get('size') is not None else "" + click.echo(f" • {comp['id']}{size_info}") + click.echo() + + # Verbose output + if verbose: + click.echo(click.style("🔍 Detailed Information:", fg='white', bold=True)) + if info.get('notes'): + click.echo("Notes:") + click.echo(f" {info['notes'][:200]}{'...' if len(info['notes']) > 200 else ''}") + click.echo() + + # Unit information (Level 3) + if info.get('substance_units'): + click.echo(f"Substance Units: {info['substance_units']}") + if info.get('time_units'): + click.echo(f"Time Units: {info['time_units']}") + if info.get('volume_units'): + click.echo(f"Volume Units: {info['volume_units']}") + + # ML Data Export + if export: + if not quiet: + click.echo() + click.echo(click.style("🔬 Exporting data...", fg='cyan', bold=True)) + + try: + exporter = SBMLMLExporter(result) + exported_files = exporter.export_to_files(output_dir, format=export) + + if not quiet: + click.echo(click.style(f"📁 Data exported to: {output_dir}", fg='green')) + for data_type, file_path in exported_files.items(): + click.echo(f" • {data_type}: {os.path.basename(file_path)}") + + # Show some ML statistics + ml_dataset = exporter.get_ml_dataset() + click.echo() + click.echo(click.style("📊 ML Dataset Summary:", fg='cyan', bold=True)) + if 'matrices' in ml_dataset: + S = ml_dataset['matrices']['stoichiometry'] + A = ml_dataset['matrices']['adjacency'] + click.echo(f" • Stoichiometry matrix: {S.shape}") + click.echo(f" • Adjacency matrix: {A.shape}") + click.echo(f" • Network density: {(A.sum() / (A.shape[0] * A.shape[1]) * 100):.1f}%") + + if 'features' in ml_dataset: + features = ml_dataset['features'] + for feat_name, feat_array in features.items(): + click.echo(f" • {feat_name} features: {feat_array.shape}") + else: + # Quiet mode - just print file paths + for file_path in exported_files.values(): + click.echo(file_path) + + except Exception as e: + click.echo(click.style(f"❌ Export Error: {e}", fg='red'), err=True) + raise click.Abort() + + if not quiet: + click.echo(click.style("✨ Parsing completed successfully!", fg='green', bold=True)) + + except UnsupportedSBMLVersionError as e: + click.echo(click.style(f"❌ Unsupported SBML Version: {e}", fg='red'), err=True) + raise click.Abort() + except SBMLParsingError as e: + click.echo(click.style(f"❌ SBML Parsing Error: {e}", fg='red'), err=True) + raise click.Abort() + except Exception as e: + click.echo(click.style(f"❌ Unexpected Error: {e}", fg='red'), err=True) + raise click.Abort() diff --git a/simba_ml/sbml_parser/__init__.py b/simba_ml/sbml_parser/__init__.py new file mode 100644 index 0000000..bf2f6e2 --- /dev/null +++ b/simba_ml/sbml_parser/__init__.py @@ -0,0 +1,16 @@ +""" +SBML Parser module for SimbaML. + +This module provides functionality to parse SBML files and extract ODE model components. +Supports commonly used SBML levels and versions for ODE modeling. +""" + +from .main_parser import MainSBMLParser, SBMLParsingError, UnsupportedSBMLVersionError +from .ml_exporter import SBMLMLExporter + +__all__ = [ + 'MainSBMLParser', + 'SBMLMLExporter', + 'SBMLParsingError', + 'UnsupportedSBMLVersionError' +] \ No newline at end of file diff --git a/simba_ml/sbml_parser/biomodels_api.py b/simba_ml/sbml_parser/biomodels_api.py new file mode 100644 index 0000000..0706221 --- /dev/null +++ b/simba_ml/sbml_parser/biomodels_api.py @@ -0,0 +1,159 @@ +""" +BioModels Database API integration for downloading SBML models. + +Based on BioModels REST API documentation at: +https://www.ebi.ac.uk/biomodels/docs/ +""" + +import requests +from pathlib import Path +from typing import Optional, List, Dict, Any +import json + + +class BioModelsAPI: + """Client for BioModels Database REST API.""" + + BASE_URL = "https://www.ebi.ac.uk/biomodels" + + def search_models(self, query: str, limit: int = 10, offset: int = 0) -> Dict[str, Any]: + """ + Search for models in BioModels Database. + + Args: + query: Search query (model name, author, keywords) + limit: Maximum number of results to return (API minimum is 10) + offset: Number of results to skip + + Returns: + Dictionary containing search results + """ + url = f"{self.BASE_URL}/search" + params = { + 'query': query, + 'numResults': max(limit, 10), # BioModels API minimum is 10 + 'offset': offset, + 'format': 'json' + } + + response = requests.get(url, params=params) + response.raise_for_status() + return response.json() + + def get_model_files(self, model_id: str) -> Dict[str, Any]: + """ + Get information about files available for a model. + + Args: + model_id: Model identifier (e.g., "BIOMD0000000012", "Malkov2020") + + Returns: + Dictionary with file information + """ + url = f"{self.BASE_URL}/model/files/{model_id}" + params = {'format': 'json'} + + response = requests.get(url, params=params) + if response.status_code == 404: + raise ValueError(f"Model {model_id} not found in BioModels Database") + response.raise_for_status() + + return response.json() + + def download_model(self, model_id: str, output_dir: Optional[str] = None, + filename: Optional[str] = None) -> str: + """ + Download SBML model file. + + Args: + model_id: Model identifier + output_dir: Directory to save the model. If None, saves to current directory. + filename: Specific filename to download. If None, downloads the main SBML file. + + Returns: + Path to downloaded file + """ + # Set output directory + output_path = Path(output_dir) if output_dir else Path(".") + output_path.mkdir(parents=True, exist_ok=True) + + # Get model file information if filename not specified + if not filename: + model_info = self.get_model_files(model_id) + + # Find SBML file in main files + sbml_files = [f for f in model_info.get('main', []) + if f['name'].endswith(('.xml', '.sbml'))] + + if not sbml_files: + raise ValueError(f"No SBML file found for model {model_id}") + + filename = sbml_files[0]['name'] + + # Download the file + download_url = f"{self.BASE_URL}/model/download/{model_id}" + params = {'filename': filename} + + print(f"Downloading {model_id}/{filename} from BioModels Database...") + response = requests.get(download_url, params=params) + response.raise_for_status() + + # Save file + output_file = output_path / filename + with open(output_file, 'wb') as f: + f.write(response.content) + + print(f"Downloaded: {output_file}") + return str(output_file) + + def get_model_info(self, model_id: str) -> Dict[str, Any]: + """ + Get detailed information about a model. + + Args: + model_id: Model identifier + + Returns: + Dictionary with model information + """ + url = f"{self.BASE_URL}/model/{model_id}" + params = {'format': 'json'} + + response = requests.get(url, params=params) + if response.status_code == 404: + raise ValueError(f"Model {model_id} not found in BioModels Database") + response.raise_for_status() + + return response.json() + + +def download_biomodel(model_id: str, output_dir: Optional[str] = None) -> str: + """ + Convenience function to download a BioModel. + + Args: + model_id: Model identifier (e.g., "BIOMD0000000012", "Malkov2020") + output_dir: Directory to save the model + + Returns: + Path to downloaded SBML file + """ + api = BioModelsAPI() + return api.download_model(model_id, output_dir) + + +def search_biomodels(query: str, limit: int = 10) -> List[Dict[str, Any]]: + """ + Convenience function to search BioModels. + + Args: + query: Search query + limit: Maximum number of results + + Returns: + List of model information + """ + api = BioModelsAPI() + results = api.search_models(query, max(limit, 10)) # API minimum is 10 + models = results.get('models', []) + return models[:limit] # Trim to requested limit diff --git a/simba_ml/sbml_parser/level_2/parser.py b/simba_ml/sbml_parser/level_2/parser.py new file mode 100644 index 0000000..557c9ae --- /dev/null +++ b/simba_ml/sbml_parser/level_2/parser.py @@ -0,0 +1,262 @@ +from libsbml import SBMLReader, formulaToString +import logging +from ..main_parser import SBMLParsingError + +logger = logging.getLogger(__name__) + +class Parser: + """ + Parser for SBML Level 2 models (versions 4 and 5). + Focuses on ODE model extraction and conversion. + """ + + def __init__(self, file_path, level=2, version=None): + self.file_path = file_path + self.level = level + self.version = version + self.model = None + self.document = None + + def parse(self): + """ + Parse SBML Level 2 file and extract ODE model components. + + Returns: + dict: Parsed model data with species, reactions, parameters, compartments + """ + try: + reader = SBMLReader() + self.document = reader.readSBML(self.file_path) + self.model = self.document.getModel() + + if self.model is None: + raise SBMLParsingError("No model found in SBML file") + + logger.info(f"Parsing SBML Level 2 Version {self.version or 'unknown'} file: {self.file_path}") + + parsed_data = { + 'sbml_info': self._get_sbml_info(), + 'species': self._parse_species(), + 'reactions': self._parse_reactions(), + 'parameters': self._parse_parameters(), + 'compartments': self._parse_compartments(), + 'rules': self._parse_rules(), + 'initial_assignments': self._parse_initial_assignments() + } + + return parsed_data + + except Exception as e: + if isinstance(e, SBMLParsingError): + raise + raise SBMLParsingError(f"Failed to parse Level 2 SBML file: {str(e)}") + + def _get_sbml_info(self): + """Extract general SBML document information.""" + return { + 'level': self.document.getLevel(), + 'version': self.document.getVersion(), + 'model_id': self.model.getId(), + 'model_name': self.model.getName(), + 'notes': self._get_notes(self.model), + 'num_species': self.model.getNumSpecies(), + 'num_reactions': self.model.getNumReactions(), + 'num_parameters': self.model.getNumParameters(), + 'num_compartments': self.model.getNumCompartments() + } + + def _parse_species(self): + """Parse species information for ODE variables.""" + species_list = [] + + for i in range(self.model.getNumSpecies()): + species = self.model.getSpecies(i) + species_data = { + 'id': species.getId(), + 'name': species.getName() if species.isSetName() else species.getId(), + 'compartment': species.getCompartment(), + 'initial_amount': species.getInitialAmount() if species.isSetInitialAmount() else None, + 'initial_concentration': species.getInitialConcentration() if species.isSetInitialConcentration() else None, + 'substance_units': species.getSubstanceUnits() if species.isSetSubstanceUnits() else None, + 'has_only_substance_units': species.getHasOnlySubstanceUnits(), + 'boundary_condition': species.getBoundaryCondition(), + 'constant': species.getConstant(), + 'notes': self._get_notes(species), + 'sbo_term': species.getSBOTermID() if species.isSetSBOTerm() else None + } + species_list.append(species_data) + + return species_list + + def _parse_reactions(self): + """Parse reactions and kinetic laws for ODE system.""" + reactions_list = [] + + for i in range(self.model.getNumReactions()): + reaction = self.model.getReaction(i) + + # Parse reactants + reactants = [] + for j in range(reaction.getNumReactants()): + reactant = reaction.getReactant(j) + reactants.append({ + 'species': reactant.getSpecies(), + 'stoichiometry': reactant.getStoichiometry(), + 'constant': reactant.getConstant() if hasattr(reactant, 'getConstant') else True + }) + + # Parse products + products = [] + for j in range(reaction.getNumProducts()): + product = reaction.getProduct(j) + products.append({ + 'species': product.getSpecies(), + 'stoichiometry': product.getStoichiometry(), + 'constant': product.getConstant() if hasattr(product, 'getConstant') else True + }) + + # Parse modifiers + modifiers = [] + for j in range(reaction.getNumModifiers()): + modifier = reaction.getModifier(j) + modifiers.append({ + 'species': modifier.getSpecies() + }) + + # Parse kinetic law + kinetic_law = None + if reaction.isSetKineticLaw(): + kl = reaction.getKineticLaw() + kinetic_law = { + 'formula': kl.getFormula() if kl.isSetFormula() else None, + 'math': formulaToString(kl.getMath()) if kl.isSetMath() else None, + 'parameters': self._parse_local_parameters(kl), + 'substance_units': kl.getSubstanceUnits() if kl.isSetSubstanceUnits() else None, + 'time_units': kl.getTimeUnits() if kl.isSetTimeUnits() else None + } + + reaction_data = { + 'id': reaction.getId(), + 'name': reaction.getName() if reaction.isSetName() else reaction.getId(), + 'reversible': reaction.getReversible(), + 'fast': reaction.getFast() if hasattr(reaction, 'getFast') else False, + 'reactants': reactants, + 'products': products, + 'modifiers': modifiers, + 'kinetic_law': kinetic_law, + 'notes': self._get_notes(reaction), + 'sbo_term': reaction.getSBOTermID() if reaction.isSetSBOTerm() else None + } + reactions_list.append(reaction_data) + + return reactions_list + + def _parse_parameters(self): + """Parse global parameters.""" + parameters_list = [] + + for i in range(self.model.getNumParameters()): + param = self.model.getParameter(i) + param_data = { + 'id': param.getId(), + 'name': param.getName() if param.isSetName() else param.getId(), + 'value': param.getValue() if param.isSetValue() else None, + 'units': param.getUnits() if param.isSetUnits() else None, + 'constant': param.getConstant(), + 'notes': self._get_notes(param), + 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None + } + parameters_list.append(param_data) + + return parameters_list + + def _parse_compartments(self): + """Parse compartment information.""" + compartments_list = [] + + for i in range(self.model.getNumCompartments()): + comp = self.model.getCompartment(i) + comp_data = { + 'id': comp.getId(), + 'name': comp.getName() if comp.isSetName() else comp.getId(), + 'spatial_dimensions': comp.getSpatialDimensions(), + 'size': comp.getSize() if comp.isSetSize() else None, + 'units': comp.getUnits() if comp.isSetUnits() else None, + 'constant': comp.getConstant(), + 'notes': self._get_notes(comp), + 'sbo_term': comp.getSBOTermID() if comp.isSetSBOTerm() else None + } + compartments_list.append(comp_data) + + return compartments_list + + def _parse_rules(self): + """Parse assignment, rate, and algebraic rules.""" + rules_list = [] + + for i in range(self.model.getNumRules()): + rule = self.model.getRule(i) + rule_type = rule.getTypeCode() + + rule_data = { + 'type': self._get_rule_type_name(rule_type), + 'variable': rule.getVariable() if hasattr(rule, 'getVariable') else None, + 'formula': rule.getFormula() if rule.isSetFormula() else None, + 'math': formulaToString(rule.getMath()) if rule.isSetMath() else None, + 'notes': self._get_notes(rule), + 'sbo_term': rule.getSBOTermID() if rule.isSetSBOTerm() else None + } + rules_list.append(rule_data) + + return rules_list + + def _parse_initial_assignments(self): + """Parse initial assignments (Level 2 Version 2+).""" + assignments_list = [] + + if hasattr(self.model, 'getNumInitialAssignments'): + for i in range(self.model.getNumInitialAssignments()): + assignment = self.model.getInitialAssignment(i) + assign_data = { + 'symbol': assignment.getSymbol(), + 'formula': assignment.getFormula() if assignment.isSetFormula() else None, + 'math': formulaToString(assignment.getMath()) if assignment.isSetMath() else None, + 'notes': self._get_notes(assignment), + 'sbo_term': assignment.getSBOTermID() if assignment.isSetSBOTerm() else None + } + assignments_list.append(assign_data) + + return assignments_list + + def _parse_local_parameters(self, kinetic_law): + """Parse local parameters within kinetic laws.""" + local_params = [] + + for i in range(kinetic_law.getNumParameters()): + param = kinetic_law.getParameter(i) + param_data = { + 'id': param.getId(), + 'name': param.getName() if param.isSetName() else param.getId(), + 'value': param.getValue() if param.isSetValue() else None, + 'units': param.getUnits() if param.isSetUnits() else None, + 'notes': self._get_notes(param), + 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None + } + local_params.append(param_data) + + return local_params + + def _get_notes(self, element): + """Extract notes/annotations from SBML element.""" + if element.isSetNotes(): + return element.getNotesString() + return None + + def _get_rule_type_name(self, type_code): + """Convert rule type code to readable name.""" + type_names = { + 1: 'assignment', # SBML_ASSIGNMENT_RULE + 2: 'rate', # SBML_RATE_RULE + 3: 'algebraic' # SBML_ALGEBRAIC_RULE + } + return type_names.get(type_code, 'unknown') diff --git a/simba_ml/sbml_parser/level_3/parser.py b/simba_ml/sbml_parser/level_3/parser.py new file mode 100644 index 0000000..c6c98b1 --- /dev/null +++ b/simba_ml/sbml_parser/level_3/parser.py @@ -0,0 +1,395 @@ +from libsbml import SBMLReader, formulaToString +import logging +from ..main_parser import SBMLParsingError + +logger = logging.getLogger(__name__) + +class Parser: + """ + Parser for SBML Level 3 models (versions 1 and 2). + Enhanced parser supporting Level 3 features like conversionFactors and extensions. + """ + + def __init__(self, file_path, level=3, version=None): + self.file_path = file_path + self.level = level + self.version = version + self.model = None + self.document = None + + def parse(self): + """ + Parse SBML Level 3 file and extract ODE model components. + + Returns: + dict: Parsed model data with species, reactions, parameters, compartments + """ + try: + reader = SBMLReader() + self.document = reader.readSBML(self.file_path) + self.model = self.document.getModel() + + if self.model is None: + raise SBMLParsingError("No model found in SBML file") + + logger.info(f"Parsing SBML Level 3 Version {self.version or 'unknown'} file: {self.file_path}") + + parsed_data = { + 'sbml_info': self._get_sbml_info(), + 'species': self._parse_species(), + 'reactions': self._parse_reactions(), + 'parameters': self._parse_parameters(), + 'compartments': self._parse_compartments(), + 'rules': self._parse_rules(), + 'initial_assignments': self._parse_initial_assignments(), + 'events': self._parse_events(), + 'constraints': self._parse_constraints(), + 'unit_definitions': self._parse_unit_definitions(), + 'function_definitions': self._parse_function_definitions() + } + + return parsed_data + + except Exception as e: + if isinstance(e, SBMLParsingError): + raise + raise SBMLParsingError(f"Failed to parse Level 3 SBML file: {str(e)}") + + def _get_sbml_info(self): + """Extract general SBML document information.""" + return { + 'level': self.document.getLevel(), + 'version': self.document.getVersion(), + 'model_id': self.model.getId(), + 'model_name': self.model.getName(), + 'substance_units': self.model.getSubstanceUnits() if self.model.isSetSubstanceUnits() else None, + 'time_units': self.model.getTimeUnits() if self.model.isSetTimeUnits() else None, + 'volume_units': self.model.getVolumeUnits() if self.model.isSetVolumeUnits() else None, + 'area_units': self.model.getAreaUnits() if self.model.isSetAreaUnits() else None, + 'length_units': self.model.getLengthUnits() if self.model.isSetLengthUnits() else None, + 'extent_units': self.model.getExtentUnits() if self.model.isSetExtentUnits() else None, + 'conversion_factor': self.model.getConversionFactor() if self.model.isSetConversionFactor() else None, + 'notes': self._get_notes(self.model), + 'num_species': self.model.getNumSpecies(), + 'num_reactions': self.model.getNumReactions(), + 'num_parameters': self.model.getNumParameters(), + 'num_compartments': self.model.getNumCompartments(), + 'num_events': self.model.getNumEvents() if hasattr(self.model, 'getNumEvents') else 0, + 'num_constraints': self.model.getNumConstraints() if hasattr(self.model, 'getNumConstraints') else 0 + } + + def _parse_species(self): + """Parse species information with Level 3 enhancements.""" + species_list = [] + + for i in range(self.model.getNumSpecies()): + species = self.model.getSpecies(i) + species_data = { + 'id': species.getId(), + 'name': species.getName() if species.isSetName() else species.getId(), + 'compartment': species.getCompartment(), + 'initial_amount': species.getInitialAmount() if species.isSetInitialAmount() else None, + 'initial_concentration': species.getInitialConcentration() if species.isSetInitialConcentration() else None, + 'substance_units': species.getSubstanceUnits() if species.isSetSubstanceUnits() else None, + 'has_only_substance_units': species.getHasOnlySubstanceUnits(), + 'boundary_condition': species.getBoundaryCondition(), + 'constant': species.getConstant(), + 'conversion_factor': species.getConversionFactor() if species.isSetConversionFactor() else None, + 'notes': self._get_notes(species), + 'sbo_term': species.getSBOTermID() if species.isSetSBOTerm() else None, + 'metaid': species.getMetaId() if species.isSetMetaId() else None + } + species_list.append(species_data) + + return species_list + + def _parse_reactions(self): + """Parse reactions with Level 3 features.""" + reactions_list = [] + + for i in range(self.model.getNumReactions()): + reaction = self.model.getReaction(i) + + # Parse reactants + reactants = [] + for j in range(reaction.getNumReactants()): + reactant = reaction.getReactant(j) + reactants.append({ + 'species': reactant.getSpecies(), + 'stoichiometry': reactant.getStoichiometry(), + 'constant': reactant.getConstant() + }) + + # Parse products + products = [] + for j in range(reaction.getNumProducts()): + product = reaction.getProduct(j) + products.append({ + 'species': product.getSpecies(), + 'stoichiometry': product.getStoichiometry(), + 'constant': product.getConstant() + }) + + # Parse modifiers + modifiers = [] + for j in range(reaction.getNumModifiers()): + modifier = reaction.getModifier(j) + modifiers.append({ + 'species': modifier.getSpecies() + }) + + # Parse kinetic law + kinetic_law = None + if reaction.isSetKineticLaw(): + kl = reaction.getKineticLaw() + kinetic_law = { + 'formula': kl.getFormula() if kl.isSetFormula() else None, + 'math': formulaToString(kl.getMath()) if kl.isSetMath() else None, + 'parameters': self._parse_local_parameters(kl) + } + + reaction_data = { + 'id': reaction.getId(), + 'name': reaction.getName() if reaction.isSetName() else reaction.getId(), + 'reversible': reaction.getReversible(), + 'compartment': reaction.getCompartment() if reaction.isSetCompartment() else None, + 'reactants': reactants, + 'products': products, + 'modifiers': modifiers, + 'kinetic_law': kinetic_law, + 'notes': self._get_notes(reaction), + 'sbo_term': reaction.getSBOTermID() if reaction.isSetSBOTerm() else None, + 'metaid': reaction.getMetaId() if reaction.isSetMetaId() else None + } + reactions_list.append(reaction_data) + + return reactions_list + + def _parse_parameters(self): + """Parse global parameters.""" + parameters_list = [] + + for i in range(self.model.getNumParameters()): + param = self.model.getParameter(i) + param_data = { + 'id': param.getId(), + 'name': param.getName() if param.isSetName() else param.getId(), + 'value': param.getValue() if param.isSetValue() else None, + 'units': param.getUnits() if param.isSetUnits() else None, + 'constant': param.getConstant(), + 'notes': self._get_notes(param), + 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None, + 'metaid': param.getMetaId() if param.isSetMetaId() else None + } + parameters_list.append(param_data) + + return parameters_list + + def _parse_compartments(self): + """Parse compartment information with Level 3 features.""" + compartments_list = [] + + for i in range(self.model.getNumCompartments()): + comp = self.model.getCompartment(i) + comp_data = { + 'id': comp.getId(), + 'name': comp.getName() if comp.isSetName() else comp.getId(), + 'spatial_dimensions': comp.getSpatialDimensions(), + 'size': comp.getSize() if comp.isSetSize() else None, + 'units': comp.getUnits() if comp.isSetUnits() else None, + 'constant': comp.getConstant(), + 'notes': self._get_notes(comp), + 'sbo_term': comp.getSBOTermID() if comp.isSetSBOTerm() else None, + 'metaid': comp.getMetaId() if comp.isSetMetaId() else None + } + compartments_list.append(comp_data) + + return compartments_list + + def _parse_rules(self): + """Parse assignment, rate, and algebraic rules.""" + rules_list = [] + + for i in range(self.model.getNumRules()): + rule = self.model.getRule(i) + rule_type = rule.getTypeCode() + + rule_data = { + 'type': self._get_rule_type_name(rule_type), + 'variable': rule.getVariable() if hasattr(rule, 'getVariable') else None, + 'formula': rule.getFormula() if rule.isSetFormula() else None, + 'math': formulaToString(rule.getMath()) if rule.isSetMath() else None, + 'notes': self._get_notes(rule), + 'sbo_term': rule.getSBOTermID() if rule.isSetSBOTerm() else None, + 'metaid': rule.getMetaId() if rule.isSetMetaId() else None + } + rules_list.append(rule_data) + + return rules_list + + def _parse_initial_assignments(self): + """Parse initial assignments.""" + assignments_list = [] + + for i in range(self.model.getNumInitialAssignments()): + assignment = self.model.getInitialAssignment(i) + assign_data = { + 'symbol': assignment.getSymbol(), + 'formula': formulaToString(assignment.getMath()) if assignment.isSetMath() else None, + 'math': formulaToString(assignment.getMath()) if assignment.isSetMath() else None, + 'notes': self._get_notes(assignment), + 'sbo_term': assignment.getSBOTermID() if assignment.isSetSBOTerm() else None, + 'metaid': assignment.getMetaId() if assignment.isSetMetaId() else None + } + assignments_list.append(assign_data) + + return assignments_list + + def _parse_events(self): + """Parse events (Level 2 Version 2+, Level 3).""" + events_list = [] + + if hasattr(self.model, 'getNumEvents'): + for i in range(self.model.getNumEvents()): + event = self.model.getEvent(i) + + # Parse trigger + trigger_data = None + if event.isSetTrigger(): + trigger = event.getTrigger() + trigger_data = { + 'formula': trigger.getFormula() if trigger.isSetFormula() else None, + 'math': formulaToString(trigger.getMath()) if trigger.isSetMath() else None, + 'initial_value': trigger.getInitialValue() if hasattr(trigger, 'getInitialValue') else None, + 'persistent': trigger.getPersistent() if hasattr(trigger, 'getPersistent') else None + } + + # Parse delay + delay_data = None + if event.isSetDelay(): + delay = event.getDelay() + delay_data = { + 'formula': delay.getFormula() if delay.isSetFormula() else None, + 'math': formulaToString(delay.getMath()) if delay.isSetMath() else None + } + + # Parse event assignments + assignments = [] + for j in range(event.getNumEventAssignments()): + ea = event.getEventAssignment(j) + assignments.append({ + 'variable': ea.getVariable(), + 'formula': ea.getFormula() if ea.isSetFormula() else None, + 'math': formulaToString(ea.getMath()) if ea.isSetMath() else None + }) + + event_data = { + 'id': event.getId() if event.isSetId() else None, + 'name': event.getName() if event.isSetName() else None, + 'use_values_from_trigger_time': event.getUseValuesFromTriggerTime() if hasattr(event, 'getUseValuesFromTriggerTime') else None, + 'trigger': trigger_data, + 'delay': delay_data, + 'event_assignments': assignments, + 'notes': self._get_notes(event), + 'sbo_term': event.getSBOTermID() if event.isSetSBOTerm() else None + } + events_list.append(event_data) + + return events_list + + def _parse_constraints(self): + """Parse constraints (Level 2 Version 2+, Level 3).""" + constraints_list = [] + + if hasattr(self.model, 'getNumConstraints'): + for i in range(self.model.getNumConstraints()): + constraint = self.model.getConstraint(i) + constraint_data = { + 'formula': constraint.getFormula() if constraint.isSetFormula() else None, + 'math': formulaToString(constraint.getMath()) if constraint.isSetMath() else None, + 'message': constraint.getMessageString() if constraint.isSetMessage() else None, + 'notes': self._get_notes(constraint), + 'sbo_term': constraint.getSBOTermID() if constraint.isSetSBOTerm() else None + } + constraints_list.append(constraint_data) + + return constraints_list + + def _parse_unit_definitions(self): + """Parse unit definitions.""" + unit_defs = [] + + for i in range(self.model.getNumUnitDefinitions()): + unit_def = self.model.getUnitDefinition(i) + + units = [] + for j in range(unit_def.getNumUnits()): + unit = unit_def.getUnit(j) + units.append({ + 'kind': unit.getKind(), + 'exponent': unit.getExponent(), + 'scale': unit.getScale(), + 'multiplier': unit.getMultiplier() + }) + + unit_def_data = { + 'id': unit_def.getId(), + 'name': unit_def.getName() if unit_def.isSetName() else unit_def.getId(), + 'units': units, + 'notes': self._get_notes(unit_def), + 'sbo_term': unit_def.getSBOTermID() if unit_def.isSetSBOTerm() else None + } + unit_defs.append(unit_def_data) + + return unit_defs + + def _parse_function_definitions(self): + """Parse function definitions.""" + function_defs = [] + + for i in range(self.model.getNumFunctionDefinitions()): + func_def = self.model.getFunctionDefinition(i) + func_data = { + 'id': func_def.getId(), + 'name': func_def.getName() if func_def.isSetName() else func_def.getId(), + 'formula': formulaToString(func_def.getMath()) if func_def.isSetMath() else None, + 'math': formulaToString(func_def.getMath()) if func_def.isSetMath() else None, + 'notes': self._get_notes(func_def), + 'sbo_term': func_def.getSBOTermID() if func_def.isSetSBOTerm() else None + } + function_defs.append(func_data) + + return function_defs + + def _parse_local_parameters(self, kinetic_law): + """Parse local parameters within kinetic laws.""" + local_params = [] + + for i in range(kinetic_law.getNumLocalParameters()): + param = kinetic_law.getLocalParameter(i) + param_data = { + 'id': param.getId(), + 'name': param.getName() if param.isSetName() else param.getId(), + 'value': param.getValue() if param.isSetValue() else None, + 'units': param.getUnits() if param.isSetUnits() else None, + 'notes': self._get_notes(param), + 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None + } + local_params.append(param_data) + + return local_params + + def _get_notes(self, element): + """Extract notes/annotations from SBML element.""" + if element.isSetNotes(): + return element.getNotesString() + return None + + def _get_rule_type_name(self, type_code): + """Convert rule type code to readable name.""" + type_names = { + 1: 'assignment', # SBML_ASSIGNMENT_RULE + 2: 'rate', # SBML_RATE_RULE + 3: 'algebraic' # SBML_ALGEBRAIC_RULE + } + return type_names.get(type_code, 'unknown') diff --git a/simba_ml/sbml_parser/main_parser.py b/simba_ml/sbml_parser/main_parser.py new file mode 100644 index 0000000..65a5222 --- /dev/null +++ b/simba_ml/sbml_parser/main_parser.py @@ -0,0 +1,163 @@ +from libsbml import SBMLReader +import logging + + +class SBMLParsingError(Exception): + """Raised when SBML file cannot be parsed or contains errors.""" + pass + + +class UnsupportedSBMLVersionError(Exception): + """Raised when SBML level/version combination is not supported.""" + pass + +logger = logging.getLogger(__name__) + +class MainSBMLParser: + """ + Main SBML parser that detects SBML level/version and routes to appropriate parser. + + Supports commonly used SBML versions for ODE models: + - Level 2: Version 4, 5 + - Level 3: Version 1, 2 + """ + + # Define supported SBML level/version combinations + SUPPORTED_VERSIONS = { + (2, 4): "level_2.parser", + (2, 5): "level_2.parser", + (3, 1): "level_3.parser", + (3, 2): "level_3.parser" + } + + def __init__(self, file_path): + self.file_path = file_path + self.level = None + self.version = None + self.model = None + + def detect_version_and_level(self): + """ + Parse SBML file to detect level and version. + + Returns: + tuple: (level, version, model) from the SBML document + + Raises: + SBMLParsingError: If file cannot be parsed or contains errors + """ + try: + reader = SBMLReader() + document = reader.readSBML(self.file_path) + + if document.getNumErrors() > 0: + error_messages = [] + for i in range(document.getNumErrors()): + error_messages.append(document.getError(i).getMessage()) + logger.error(f"SBML parsing errors: {'; '.join(error_messages)}") + raise SBMLParsingError(f"Error reading SBML file: {'; '.join(error_messages)}") + + model = document.getModel() + if model is None: + raise SBMLParsingError("No model found in SBML file.") + + level = document.getLevel() + version = document.getVersion() + + self.level = level + self.version = version + self.model = model + + logger.info(f"Detected SBML Level {level}, Version {version}") + return level, version, model + + except Exception as e: + if isinstance(e, (SBMLParsingError, UnsupportedSBMLVersionError)): + raise + raise SBMLParsingError(f"Failed to parse SBML file '{self.file_path}': {str(e)}") + + def validate_ode_model(self, model): + """ + Validate that the SBML model represents an ODE system. + + Args: + model: SBML model object + + Raises: + SBMLParsingError: If model doesn't appear to be ODE-based + """ + if model.getListOfReactions().size() == 0: + logger.warning("No reactions found - this may not be a dynamic ODE model") + + # Check for basic ODE model requirements + has_kinetic_laws = False + for reaction in model.getListOfReactions(): + if reaction.getKineticLaw() is not None: + has_kinetic_laws = True + break + + if not has_kinetic_laws and model.getListOfReactions().size() > 0: + logger.warning("Reactions found but no kinetic laws - this may not be suitable for ODE simulation") + + def get_parser_module(self, level, version): + """ + Get the appropriate parser module for the given level/version. + + Args: + level: SBML level + version: SBML version + + Returns: + str: Module path for the parser + + Raises: + UnsupportedSBMLVersionError: If level/version combination is not supported + """ + if (level, version) not in self.SUPPORTED_VERSIONS: + supported_versions = [f"Level {l} Version {v}" for l, v in self.SUPPORTED_VERSIONS.keys()] + raise UnsupportedSBMLVersionError( + f"SBML Level {level} Version {version} is not supported. " + f"Supported versions: {', '.join(supported_versions)}" + ) + + return self.SUPPORTED_VERSIONS[(level, version)] + + def process(self): + """ + Main processing method that detects version and delegates to appropriate parser. + + Returns: + Parsed model data structure + + Raises: + UnsupportedSBMLVersionError: If SBML version is not supported + SBMLParsingError: If parsing fails + """ + level, version, model = self.detect_version_and_level() + + # Validate ODE model characteristics + self.validate_ode_model(model) + + # Get and instantiate the appropriate parser + parser_module_path = self.get_parser_module(level, version) + + try: + if parser_module_path == "level_2.parser": + from .level_2.parser import Parser as VersionParser + elif parser_module_path == "level_3.parser": + from .level_3.parser import Parser as VersionParser + else: + raise ImportError(f"Unknown parser module: {parser_module_path}") + + parser = VersionParser(self.file_path, level, version) + parsed_data = parser.parse() + + # Add metadata with file path for units parsing + if 'metadata' not in parsed_data: + parsed_data['metadata'] = {} + parsed_data['metadata']['sbml_file_path'] = self.file_path + + return parsed_data + + except ImportError as e: + raise SBMLParsingError(f"Failed to import parser for Level {level} Version {version}: {str(e)}") diff --git a/simba_ml/sbml_parser/ml_exporter.py b/simba_ml/sbml_parser/ml_exporter.py new file mode 100644 index 0000000..85c96cd --- /dev/null +++ b/simba_ml/sbml_parser/ml_exporter.py @@ -0,0 +1,637 @@ +""" +ML Data Exporter for SBML models. + +Converts parsed SBML data into machine learning-ready formats including: +- Structured DataFrames for species, reactions, parameters +- Network matrices (stoichiometry, adjacency) +- Feature vectors for ML training +- Export to various formats (CSV, JSON, NumPy, etc.) +""" + +import pandas as pd +import numpy as np +import json +import libsbml +from typing import Dict, List, Tuple, Optional, Any +from pathlib import Path + + +class SBMLMLExporter: + """Export SBML parsed data in machine learning-ready formats.""" + + def __init__(self, parsed_data: Dict[str, Any]): + """ + Initialize with parsed SBML data. + + Args: + parsed_data: Output from MainSBMLParser.process() + """ + self.data = parsed_data + self.sbml_info = parsed_data['sbml_info'] + self.species = parsed_data['species'] + self.reactions = parsed_data['reactions'] + self.parameters = parsed_data['parameters'] + self.compartments = parsed_data['compartments'] + + # Process species to separate dynamic from boundary + self._process_species_types() + self._parse_units_system() + self._normalize_species_units() + + def _process_species_types(self): + """Separate dynamic species from boundary/constant species.""" + self.dynamic_species = [] + self.boundary_species = [] + + for sp in self.species: + is_boundary = sp.get('boundary_condition', False) + is_constant = sp.get('constant', False) + + if is_boundary or is_constant: + self.boundary_species.append(sp) + else: + self.dynamic_species.append(sp) + + def _parse_units_system(self): + """Parse SBML units system using libSBML.""" + # Initialize units info based on SBML Level + level = self.sbml_info['level'] + + if level == 2: + # Level 2: Use SBML specification defaults + self.units_info = { + 'substance_unit': 'mole', # Official SBML Level 2 default + 'time_unit': 'second', # Official SBML Level 2 default + 'volume_unit': 'litre', # Official SBML Level 2 default + 'substance_multiplier': 1.0, + 'time_multiplier': 1.0 + } + elif level == 3: + # Level 3: No defaults, all units must be explicitly defined + self.units_info = { + 'substance_unit': None, # Must be explicitly defined + 'time_unit': None, # Must be explicitly defined + 'volume_unit': None, # Must be explicitly defined + 'substance_multiplier': 1.0, + 'time_multiplier': 1.0 + } + else: + # Fallback for other levels + self.units_info = { + 'substance_unit': None, + 'time_unit': None, + 'volume_unit': None, + 'substance_multiplier': 1.0, + 'time_multiplier': 1.0 + } + + # We need to re-parse with libSBML to get units info + # This is necessary because the main parser doesn't extract unit definitions + if 'sbml_file_path' in self.data.get('metadata', {}): + file_path = self.data['metadata']['sbml_file_path'] + else: + # If file path not available, we'll work with defaults + return + + try: + reader = libsbml.SBMLReader() + doc = reader.readSBML(file_path) + model = doc.getModel() + + # Parse unit definitions + self._extract_unit_definitions(model) + + except Exception as e: + # If units parsing fails, use defaults + pass + + def _extract_unit_definitions(self, model): + """Extract unit definitions from libSBML model.""" + # Check for custom unit definitions + for i in range(model.getNumUnitDefinitions()): + unit_def = model.getUnitDefinition(i) + unit_id = unit_def.getId() + + if unit_id in ['substance', 'time', 'volume']: + # Parse the unit definition + if unit_def.getNumUnits() > 0: + unit = unit_def.getUnit(0) # Take first unit + kind = libsbml.UnitKind_toString(unit.getKind()) + scale = unit.getScale() + multiplier = unit.getMultiplier() + + # Calculate actual multiplier: multiplier * 10^scale + actual_multiplier = multiplier * (10 ** scale) + + if unit_id == 'substance': + self.units_info['substance_unit'] = kind + self.units_info['substance_multiplier'] = actual_multiplier + elif unit_id == 'time': + self.units_info['time_unit'] = kind + self.units_info['time_multiplier'] = actual_multiplier + elif unit_id == 'volume': + self.units_info['volume_unit'] = kind + + # Set model-level units if specified + level = self.sbml_info['level'] + + if model.isSetSubstanceUnits(): + substance_unit_ref = model.getSubstanceUnits() + # For Level 3, this should reference a unit definition + if level == 3 and substance_unit_ref in ['substance'] and self.units_info['substance_unit']: + # Keep the parsed unit definition + pass + else: + self.units_info['substance_unit'] = substance_unit_ref + + if model.isSetTimeUnits(): + time_unit_ref = model.getTimeUnits() + # For Level 3, this should reference a unit definition + if level == 3 and time_unit_ref in ['time'] and self.units_info['time_unit']: + # Keep the parsed unit definition + pass + else: + self.units_info['time_unit'] = time_unit_ref + + if model.isSetVolumeUnits(): + volume_unit_ref = model.getVolumeUnits() + # For Level 3, this should reference a unit definition + if level == 3 and volume_unit_ref in ['volume'] and self.units_info['volume_unit']: + # Keep the parsed unit definition + pass + else: + self.units_info['volume_unit'] = volume_unit_ref + + # For Level 3, validate that all required units are explicitly defined + if level == 3: + self._validate_level3_units() + + def _validate_level3_units(self): + """Validate that Level 3 models have required units explicitly defined.""" + import logging + logger = logging.getLogger(__name__) + + missing_units = [] + + # Check if model actually needs these units + needs_substance = self._model_uses_concentrations_or_amounts() + needs_time = self._model_has_kinetic_laws() + needs_volume = self._model_uses_concentrations() + + if needs_substance and self.units_info['substance_unit'] is None: + missing_units.append('substance') + if needs_time and self.units_info['time_unit'] is None: + missing_units.append('time') + if needs_volume and self.units_info['volume_unit'] is None: + missing_units.append('volume') + + if missing_units: + error_msg = (f"SBML Level 3 model missing required unit definitions: {', '.join(missing_units)}. " + f"Level 3 specification requires all used units to be explicitly defined.") + logger.error(error_msg) + raise ValueError(error_msg) + + def _model_uses_concentrations_or_amounts(self) -> bool: + """Check if model uses species amounts or concentrations.""" + return any(sp.get('initial_concentration') is not None or + sp.get('initial_amount') is not None + for sp in self.species) + + def _model_has_kinetic_laws(self) -> bool: + """Check if model has kinetic laws (needs time units).""" + return any(rxn.get('kinetic_law') is not None for rxn in self.reactions) + + def _model_uses_concentrations(self) -> bool: + """Check if model uses concentrations (needs volume units).""" + return any(sp.get('initial_concentration') is not None for sp in self.species) + + + def _get_compartment_size(self, compartment_id: str) -> float: + """Get size of a compartment by ID.""" + for comp in self.compartments: + if comp['id'] == compartment_id: + return comp.get('size', 1.0) + return 1.0 # Default size if not found + + def _normalize_species_units(self): + """Convert all species to concentration units for consistent ODE formulation.""" + for sp in self.species: + compartment_size = self._get_compartment_size(sp['compartment']) + + # Convert to concentration if needed + if sp.get('initial_concentration') is not None: + # Already in concentration units + sp['normalized_concentration'] = sp['initial_concentration'] + sp['units_type'] = 'concentration' + + elif sp.get('initial_amount') is not None: + # Convert amount to concentration: [X] = amount / volume + sp['normalized_concentration'] = sp['initial_amount'] / compartment_size + sp['units_type'] = 'amount_converted' + + else: + # No initial condition specified + sp['normalized_concentration'] = 0.0 + sp['units_type'] = 'default' + + # Add units information from parsed SBML + sp['substance_unit'] = self.units_info['substance_unit'] + sp['substance_multiplier'] = self.units_info['substance_multiplier'] + sp['time_unit'] = self.units_info['time_unit'] + sp['time_multiplier'] = self.units_info['time_multiplier'] + sp['volume_unit'] = self.units_info['volume_unit'] + + def get_dynamic_species_concentrations(self) -> Tuple[np.ndarray, List[str]]: + """ + Get normalized initial concentrations for dynamic species only. + + Returns: + tuple: (concentrations_array, species_ids) + - concentrations_array: Initial concentrations for ODE system + - species_ids: Corresponding species identifiers + """ + concentrations = [] + species_ids = [] + + for sp in self.dynamic_species: + concentrations.append(sp['normalized_concentration']) + species_ids.append(sp['id']) + + return np.array(concentrations), species_ids + + def get_boundary_species_info(self) -> List[Dict[str, Any]]: + """ + Get information about boundary/constant species. + + Returns: + list: Information about boundary species that remain constant + """ + boundary_info = [] + for sp in self.boundary_species: + boundary_info.append({ + 'id': sp['id'], + 'concentration': sp['normalized_concentration'], + 'boundary_condition': sp.get('boundary_condition', False), + 'constant': sp.get('constant', False) + }) + return boundary_info + + def to_dataframes(self) -> Dict[str, pd.DataFrame]: + """ + Convert parsed data to pandas DataFrames. + + Returns: + dict: DataFrames for different components + """ + dataframes = {} + + # Species DataFrame + if self.species: + species_data = [] + for sp in self.species: + species_data.append({ + 'species_id': sp['id'], + 'name': sp['name'], + 'compartment': sp['compartment'], + 'initial_concentration': sp.get('initial_concentration'), + 'initial_amount': sp.get('initial_amount'), + 'normalized_concentration': sp.get('normalized_concentration'), + 'units_type': sp.get('units_type'), + 'substance_unit': sp.get('substance_unit'), + 'substance_multiplier': sp.get('substance_multiplier'), + 'time_unit': sp.get('time_unit'), + 'time_multiplier': sp.get('time_multiplier'), + 'volume_unit': sp.get('volume_unit'), + 'boundary_condition': sp.get('boundary_condition', False), + 'constant': sp.get('constant', False), + 'has_only_substance_units': sp.get('has_only_substance_units', False), + 'is_dynamic': not (sp.get('boundary_condition', False) or sp.get('constant', False)) + }) + dataframes['species'] = pd.DataFrame(species_data) + + # Reactions DataFrame + if self.reactions: + reaction_data = [] + for rxn in self.reactions: + # Basic reaction info + rxn_row = { + 'reaction_id': rxn['id'], + 'name': rxn['name'], + 'reversible': rxn.get('reversible', False), + 'fast': rxn.get('fast', False), + 'has_kinetic_law': rxn.get('kinetic_law') is not None, + 'num_reactants': len(rxn.get('reactants', [])), + 'num_products': len(rxn.get('products', [])), + 'num_modifiers': len(rxn.get('modifiers', [])) + } + + # Add kinetic law info if available + if rxn.get('kinetic_law'): + kl = rxn['kinetic_law'] + rxn_row.update({ + 'kinetic_formula': kl.get('formula'), + 'kinetic_math': kl.get('math'), + 'num_local_parameters': len(kl.get('parameters', [])) + }) + + reaction_data.append(rxn_row) + dataframes['reactions'] = pd.DataFrame(reaction_data) + + # Parameters DataFrame + if self.parameters: + param_data = [] + for param in self.parameters: + param_data.append({ + 'parameter_id': param['id'], + 'name': param['name'], + 'value': param.get('value'), + 'units': param.get('units'), + 'constant': param.get('constant', True) + }) + dataframes['parameters'] = pd.DataFrame(param_data) + + # Compartments DataFrame + if self.compartments: + comp_data = [] + for comp in self.compartments: + comp_data.append({ + 'compartment_id': comp['id'], + 'name': comp['name'], + 'size': comp.get('size'), + 'spatial_dimensions': comp.get('spatial_dimensions'), + 'constant': comp.get('constant', True) + }) + dataframes['compartments'] = pd.DataFrame(comp_data) + + return dataframes + + def get_stoichiometry_matrix(self, dynamic_only: bool = True) -> Tuple[np.ndarray, List[str], List[str]]: + """ + Create stoichiometry matrix for the reaction network. + + Args: + dynamic_only: If True, only include non-boundary, non-constant species + + Returns: + tuple: (matrix, species_ids, reaction_ids) + - matrix: shape (n_dynamic_species, n_reactions) or (n_species, n_reactions) + - species_ids: list of species identifiers + - reaction_ids: list of reaction identifiers + """ + if dynamic_only: + species_list = self.dynamic_species + species_ids = [sp['id'] for sp in species_list] + else: + species_list = self.species + species_ids = [sp['id'] for sp in species_list] + + reaction_ids = [rxn['id'] for rxn in self.reactions] + + # Create species index mapping + species_idx = {sp_id: i for i, sp_id in enumerate(species_ids)} + + # Initialize stoichiometry matrix + S = np.zeros((len(species_ids), len(reaction_ids))) + + for j, reaction in enumerate(self.reactions): + # Add reactants (negative stoichiometry) + for reactant in reaction.get('reactants', []): + sp_id = reactant['species'] + if sp_id in species_idx: # Only include if in our species list + i = species_idx[sp_id] + stoich = reactant.get('stoichiometry', 1.0) + S[i, j] -= stoich + + # Add products (positive stoichiometry) + for product in reaction.get('products', []): + sp_id = product['species'] + if sp_id in species_idx: # Only include if in our species list + i = species_idx[sp_id] + stoich = product.get('stoichiometry', 1.0) + S[i, j] += stoich + + return S, species_ids, reaction_ids + + def get_adjacency_matrix(self, include_modifiers: bool = True) -> Tuple[np.ndarray, List[str]]: + """ + Create adjacency matrix representing species-species interactions. + + Args: + include_modifiers: Whether to include modifier relationships + + Returns: + tuple: (adjacency_matrix, species_ids) + - adjacency_matrix: shape (n_species, n_species) + - species_ids: list of species identifiers + """ + species_ids = [sp['id'] for sp in self.species] + species_idx = {sp_id: i for i, sp_id in enumerate(species_ids)} + + # Initialize adjacency matrix + A = np.zeros((len(species_ids), len(species_ids))) + + for reaction in self.reactions: + reactant_ids = [r['species'] for r in reaction.get('reactants', [])] + product_ids = [p['species'] for p in reaction.get('products', [])] + modifier_ids = [m['species'] for m in reaction.get('modifiers', [])] if include_modifiers else [] + + # Reactants to products + for reactant_id in reactant_ids: + for product_id in product_ids: + if reactant_id in species_idx and product_id in species_idx: + i, j = species_idx[reactant_id], species_idx[product_id] + A[i, j] = 1 + + # Modifiers to products (regulatory interactions) + if include_modifiers: + for modifier_id in modifier_ids: + for product_id in product_ids: + if modifier_id in species_idx and product_id in species_idx: + i, j = species_idx[modifier_id], species_idx[product_id] + A[i, j] = 1 + + return A, species_ids + + def get_feature_vectors(self) -> Dict[str, np.ndarray]: + """ + Extract feature vectors for ML training. + + Returns: + dict: Feature vectors for different components + """ + features = {} + + # Species features + if self.species: + species_features = [] + for sp in self.species: + feat = [ + float(sp.get('initial_concentration') or sp.get('initial_amount') or 0), + float(sp.get('boundary_condition', False)), + float(sp.get('constant', False)), + float(sp.get('has_only_substance_units', False)) + ] + species_features.append(feat) + features['species'] = np.array(species_features) + + # Reaction features + if self.reactions: + reaction_features = [] + for rxn in self.reactions: + feat = [ + float(rxn.get('reversible', False)), + float(rxn.get('fast', False)), + float(rxn.get('kinetic_law') is not None), + float(len(rxn.get('reactants', []))), + float(len(rxn.get('products', []))), + float(len(rxn.get('modifiers', []))) + ] + reaction_features.append(feat) + features['reactions'] = np.array(reaction_features) + + # Network topology features + if self.species and self.reactions: + S, _, _ = self.get_stoichiometry_matrix() + A, _ = self.get_adjacency_matrix() + + # Network-level features + network_features = [ + len(self.species), # Number of species + len(self.reactions), # Number of reactions + np.count_nonzero(S), # Number of non-zero stoichiometry entries + np.count_nonzero(A), # Number of edges in adjacency graph + np.mean(np.sum(np.abs(S), axis=1)), # Average species degree + np.mean(np.sum(A, axis=1)), # Average adjacency degree + ] + features['network'] = np.array(network_features) + + return features + + def get_ml_dataset(self) -> Dict[str, Any]: + """ + Get comprehensive ML-ready dataset. + + Returns: + dict: Complete dataset with matrices, features, and metadata + """ + dataset = { + 'metadata': { + 'sbml_level': self.sbml_info['level'], + 'sbml_version': self.sbml_info['version'], + 'model_id': self.sbml_info['model_id'], + 'model_name': self.sbml_info['model_name'], + 'num_species': len(self.species), + 'num_reactions': len(self.reactions), + 'num_parameters': len(self.parameters), + 'has_kinetic_laws': any(r.get('kinetic_law') for r in self.reactions) + } + } + + # Add matrices + if self.species and self.reactions: + S, species_ids, reaction_ids = self.get_stoichiometry_matrix() + A, _ = self.get_adjacency_matrix() + + dataset['matrices'] = { + 'stoichiometry': S, + 'adjacency': A, + 'species_ids': species_ids, + 'reaction_ids': reaction_ids + } + + # Add feature vectors + dataset['features'] = self.get_feature_vectors() + + # Add DataFrames + dataset['dataframes'] = self.to_dataframes() + + return dataset + + def export_to_files(self, output_dir: str, format: str = 'csv') -> Dict[str, str]: + """ + Export data to files for ML workflows. + + Args: + output_dir: Directory to save files + format: Export format ('csv', 'json', 'npz', 'pickle') + + Returns: + dict: Mapping of data type to file path + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + model_name = self.sbml_info.get('model_id', 'sbml_model') + exported_files = {} + + if format == 'csv': + # Export DataFrames as CSV + dataframes = self.to_dataframes() + for name, df in dataframes.items(): + file_path = output_path / f"{model_name}_{name}.csv" + df.to_csv(file_path, index=False) + exported_files[name] = str(file_path) + + # Export matrices as CSV + if self.species and self.reactions: + S, species_ids, reaction_ids = self.get_stoichiometry_matrix() + A, _ = self.get_adjacency_matrix() + + # Stoichiometry matrix with labels + S_df = pd.DataFrame(S, index=species_ids, columns=reaction_ids) + S_file = output_path / f"{model_name}_stoichiometry.csv" + S_df.to_csv(S_file) + exported_files['stoichiometry'] = str(S_file) + + # Adjacency matrix with labels + A_df = pd.DataFrame(A, index=species_ids, columns=species_ids) + A_file = output_path / f"{model_name}_adjacency.csv" + A_df.to_csv(A_file) + exported_files['adjacency'] = str(A_file) + + elif format == 'json': + # Export as JSON + ml_dataset = self.get_ml_dataset() + # Convert numpy arrays to lists for JSON serialization + def convert_numpy(obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, pd.DataFrame): + return obj.to_dict('records') + return obj + + json_data = {} + for key, value in ml_dataset.items(): + if key == 'dataframes': + json_data[key] = {k: v.to_dict('records') for k, v in value.items()} + elif key == 'matrices': + json_data[key] = {k: convert_numpy(v) for k, v in value.items()} + elif key == 'features': + json_data[key] = {k: convert_numpy(v) for k, v in value.items()} + else: + json_data[key] = value + + json_file = output_path / f"{model_name}_ml_data.json" + with open(json_file, 'w') as f: + json.dump(json_data, f, indent=2) + exported_files['ml_data'] = str(json_file) + + elif format == 'npz': + # Export as NumPy compressed format + ml_dataset = self.get_ml_dataset() + arrays_to_save = {} + + # Flatten all numpy arrays with descriptive names + if 'matrices' in ml_dataset: + arrays_to_save['stoichiometry_matrix'] = ml_dataset['matrices']['stoichiometry'] + arrays_to_save['adjacency_matrix'] = ml_dataset['matrices']['adjacency'] + + if 'features' in ml_dataset: + for feat_name, feat_array in ml_dataset['features'].items(): + arrays_to_save[f'{feat_name}_features'] = feat_array + + npz_file = output_path / f"{model_name}_ml_data.npz" + np.savez_compressed(npz_file, **arrays_to_save) + exported_files['ml_data'] = str(npz_file) + + return exported_files diff --git a/tests/sbml_parser/test_main_parser.py b/tests/sbml_parser/test_main_parser.py new file mode 100644 index 0000000..70edd1a --- /dev/null +++ b/tests/sbml_parser/test_main_parser.py @@ -0,0 +1,315 @@ +""" +Tests for the main SBML parser functionality. +""" +import pytest +import tempfile +import os +from simba_ml.sbml_parser.main_parser import MainSBMLParser, SBMLParsingError, UnsupportedSBMLVersionError + + +class TestMainSBMLParser: + """Test the main SBML parser functionality.""" + + def test_init(self): + """Test parser initialization.""" + parser = MainSBMLParser("test_file.xml") + assert parser.file_path == "test_file.xml" + assert parser.level is None + assert parser.version is None + assert parser.model is None + + def test_supported_versions(self): + """Test that supported versions are correctly defined.""" + expected_versions = { + (2, 4): "level_2.parser", + (2, 5): "level_2.parser", + (3, 1): "level_3.parser", + (3, 2): "level_3.parser" + } + assert MainSBMLParser.SUPPORTED_VERSIONS == expected_versions + + def test_get_parser_module_supported(self): + """Test getting parser module for supported versions.""" + parser = MainSBMLParser("test.xml") + + # Test Level 2 versions + assert parser.get_parser_module(2, 4) == "level_2.parser" + assert parser.get_parser_module(2, 5) == "level_2.parser" + + # Test Level 3 versions + assert parser.get_parser_module(3, 1) == "level_3.parser" + assert parser.get_parser_module(3, 2) == "level_3.parser" + + def test_get_parser_module_unsupported(self): + """Test error for unsupported versions.""" + parser = MainSBMLParser("test.xml") + + with pytest.raises(UnsupportedSBMLVersionError) as excinfo: + parser.get_parser_module(1, 2) + + assert "Level 1 Version 2 is not supported" in str(excinfo.value) + assert "Supported versions:" in str(excinfo.value) + + def test_detect_version_and_level_invalid_file(self): + """Test error handling for invalid file.""" + parser = MainSBMLParser("nonexistent_file.xml") + + with pytest.raises(SBMLParsingError): + parser.detect_version_and_level() + + def test_validate_ode_model_no_reactions(self): + """Test validation warning for models without reactions.""" + # Create a minimal SBML model without reactions for testing + sbml_content = ''' + + + + + + + + + +''' + + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write(sbml_content) + f.flush() + + try: + parser = MainSBMLParser(f.name) + level, version, model = parser.detect_version_and_level() + + # Should not raise error, just log warning + parser.validate_ode_model(model) + + assert level == 3 + assert version == 1 + assert model is not None + + finally: + os.unlink(f.name) + + def test_detect_version_level_with_valid_sbml(self): + """Test detection with valid SBML content.""" + sbml_content = ''' + + + + + + + + + + + + + + + + + + + + + + k + A + + + + + + + + + +''' + + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write(sbml_content) + f.flush() + + try: + parser = MainSBMLParser(f.name) + level, version, model = parser.detect_version_and_level() + + assert level == 3 + assert version == 2 + assert model is not None + assert parser.level == 3 + assert parser.version == 2 + assert parser.model is not None + + finally: + os.unlink(f.name) + + +class TestSBMLParsingIntegration: + """Integration tests for SBML parsing.""" + + def create_test_sbml_file(self, level, version, content_additions=""): + """Helper to create test SBML files.""" + base_content = f''' + + + + + + + + + + + + + + + + + + k + A + + + {'' if level == 3 else ''} + + + + {content_additions} + +''' + + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) + temp_file.write(base_content) + temp_file.flush() + temp_file.close() + return temp_file.name + + def test_level_2_version_4_parsing(self): + """Test parsing Level 2 Version 4 files.""" + test_file = self.create_test_sbml_file(2, 4) + + try: + parser = MainSBMLParser(test_file) + result = parser.process() + + assert isinstance(result, dict) + assert 'sbml_info' in result + assert result['sbml_info']['level'] == 2 + assert result['sbml_info']['version'] == 4 + + finally: + os.unlink(test_file) + + def test_level_2_version_5_parsing(self): + """Test parsing Level 2 Version 5 files.""" + test_file = self.create_test_sbml_file(2, 5) + + try: + parser = MainSBMLParser(test_file) + result = parser.process() + + assert isinstance(result, dict) + assert 'sbml_info' in result + assert result['sbml_info']['level'] == 2 + assert result['sbml_info']['version'] == 5 + + finally: + os.unlink(test_file) + + def test_level_3_version_1_parsing(self): + """Test parsing Level 3 Version 1 files.""" + test_file = self.create_test_sbml_file(3, 1) + + try: + parser = MainSBMLParser(test_file) + result = parser.process() + + assert isinstance(result, dict) + assert 'sbml_info' in result + assert result['sbml_info']['level'] == 3 + assert result['sbml_info']['version'] == 1 + + finally: + os.unlink(test_file) + + def test_level_3_version_2_parsing(self): + """Test parsing Level 3 Version 2 files.""" + test_file = self.create_test_sbml_file(3, 2) + + try: + parser = MainSBMLParser(test_file) + result = parser.process() + + assert isinstance(result, dict) + assert 'sbml_info' in result + assert result['sbml_info']['level'] == 3 + assert result['sbml_info']['version'] == 2 + + finally: + os.unlink(test_file) + + def test_unsupported_version_raises_error(self): + """Test that unsupported versions raise appropriate errors.""" + # Create a Level 1 SBML file (unsupported) + sbml_content = ''' + + + + + + +''' + + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write(sbml_content) + f.flush() + + try: + parser = MainSBMLParser(f.name) + + with pytest.raises(UnsupportedSBMLVersionError) as excinfo: + parser.process() + + assert "Level 1 Version 2 is not supported" in str(excinfo.value) + + finally: + os.unlink(f.name) + + def test_malformed_sbml_raises_error(self): + """Test that malformed SBML raises parsing error.""" + malformed_content = ''' + + + + + + +''' + + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: + f.write(malformed_content) + f.flush() + + try: + parser = MainSBMLParser(f.name) + # Should not raise error during parsing, but might log warnings + result = parser.process() + assert isinstance(result, dict) + + finally: + os.unlink(f.name) + + def teardown_method(self): + """Clean up any temporary files.""" + pass + + +if __name__ == "__main__": + pytest.main([__file__]) \ No newline at end of file From 378a704ea299e9480e0c36d894fc2e55cad0498e Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Sat, 30 Aug 2025 19:17:12 +0200 Subject: [PATCH 2/7] ODE detection change & cleanup: - Updated validation logic and type code check - Fixed _get_rule_type_name() mapping - Added comprehensive ODE detection helpers - Added rate rule support and proper method naming - Added rate rule execution in ODE derivatives - fix trailing whitespace and EOF newlines --- .coveragerc | 1 - .darglint | 1 - .gitignore | 3 +- .readthedocs.yml | 2 +- .vscode/settings.json | 2 +- Dockerfile | 2 +- MANIFEST.in | 2 +- Makefile | 2 +- .../Models/time_series_models.rst | 28 +-- .../Models/transfer_learning_models.rst | 2 +- .../Pipelines/index.rst | 4 +- .../Pipelines/mixed_data_pipeline.rst | 2 +- .../Pipelines/mixed_data_pipeline.toml | 2 +- .../Pipelines/synthetic_data_pipeline.rst | 2 +- .../Pipelines/synthetic_data_pipeline.toml | 2 +- .../Pipelines/transfer_learning_pipeline.rst | 2 +- .../Pipelines/transfer_learning_pipeline.toml | 2 +- docs/source/Usage/Machine-Learning/index.rst | 4 +- .../Simulation/create_complex_config.rst | 2 +- docs/source/Usage/Simulation/index.rst | 2 +- .../Usage/Simulation/run_data_generation.rst | 4 +- docs/source/Usage/cli.rst | 2 +- docs/source/about/acknowledgements.rst | 4 +- docs/source/about/authors.rst | 2 +- docs/source/about/cite.rst | 2 +- docs/source/api.rst | 2 +- docs/source/contents.rst | 4 +- docs/source/contributing.rst | 4 +- docs/source/index.rst | 2 +- docs/source/installation.rst | 4 +- docs/source/quickstart.rst | 2 +- docs_requirements.txt | 2 +- pylintrc | 2 +- pyproject.toml | 2 +- setup.cfg | 2 +- setup.py | 5 + simba_ml/cli/__main__.py | 30 ++- simba_ml/sbml_parser/__init__.py | 6 +- simba_ml/sbml_parser/biomodels_api.py | 203 +++++++--------- simba_ml/sbml_parser/level_2/parser.py | 97 +++++--- simba_ml/sbml_parser/level_3/parser.py | 125 ++++++---- simba_ml/sbml_parser/main_parser.py | 84 ++++--- simba_ml/sbml_parser/ml_exporter.py | 230 +++++++++--------- simba_ml/simulation/system_model/__init__.py | 3 +- .../conf/steady_state_pipeline_test_conf.toml | 2 +- .../test_data/real/SteadyState_0.csv | 2 +- .../test_data/real/SteadyState_1.csv | 2 +- .../test_data/real/SteadyState_2.csv | 2 +- .../test_data/real/SteadyState_3.csv | 2 +- .../test_data/real/SteadyState_4.csv | 2 +- .../test_data/simulated/SteadyState_0.csv | 2 +- .../test_data/simulated/SteadyState_1.csv | 2 +- .../test_data/simulated/SteadyState_2.csv | 2 +- .../test_data/simulated/SteadyState_3.csv | 2 +- .../test_data/simulated/SteadyState_4.csv | 2 +- .../conf/mixed_data_pipeline_export.toml | 2 +- .../pipeline_test_conf_plugins_normalize.toml | 2 - .../conf/synthetic_data_pipeline_export.toml | 2 +- .../synthetic_data_pipeline_test_conf.toml | 2 +- ...tic_data_pipeline_test_conf_3_species.toml | 2 - .../conf/time_series_pipeline_test_conf.toml | 2 +- .../conf/transfer_learning_pipeline.toml | 2 +- .../transfer_learning_pipeline_3_spec.toml | 2 +- .../transfer_learning_pipeline_export.toml | 2 +- .../conf/transfer_learning_pipeline_test.toml | 2 +- .../test_data/num_species_1/real/.gitignore | 2 +- .../test_data/num_species_1/real/SIR_0.csv | 2 +- .../test_data/num_species_1/real/SIR_1.csv | 2 +- .../test_data/num_species_1/real/SIR_2.csv | 2 +- .../test_data/num_species_1/real/SIR_3.csv | 2 +- .../test_data/num_species_1/real/SIR_4.csv | 2 +- .../num_species_1/simulated/.gitignore | 2 +- .../num_species_1/simulated/SIR_0.csv | 2 +- .../num_species_1/simulated/SIR_1.csv | 2 +- .../num_species_1/simulated/SIR_2.csv | 2 +- .../num_species_1/simulated/SIR_3.csv | 2 +- .../num_species_1/simulated/SIR_4.csv | 2 +- .../test_data_plugins/real/.gitignore | 2 +- .../test_data_plugins/real/SIR_0.csv | 99 ++++---- .../test_data_plugins/real/SIR_1.csv | 99 ++++---- .../test_data_plugins/real/SIR_2.csv | 99 ++++---- .../test_data_plugins/real/SIR_3.csv | 99 ++++---- .../test_data_plugins/real/SIR_4.csv | 99 ++++---- .../test_data_plugins/simulated/.gitignore | 2 +- .../test_data_plugins/simulated/SIR_0.csv | 100 ++++---- .../test_data_plugins/simulated/SIR_1.csv | 100 ++++---- .../test_data_plugins/simulated/SIR_2.csv | 100 ++++---- .../test_data_plugins/simulated/SIR_3.csv | 100 ++++---- .../test_data_plugins/simulated/SIR_4.csv | 100 ++++---- tests/sbml_parser/test_main_parser.py | 88 +++---- 90 files changed, 1064 insertions(+), 981 deletions(-) diff --git a/.coveragerc b/.coveragerc index 91ac824..1e453ef 100644 --- a/.coveragerc +++ b/.coveragerc @@ -8,4 +8,3 @@ exclude_lines = if __name__ == .__main__.: pragma: no cover show_missing = True - diff --git a/.darglint b/.darglint index 02451df..71c2a45 100644 --- a/.darglint +++ b/.darglint @@ -1,4 +1,3 @@ [darglint] ignore=DAR402 docstring_style=google - diff --git a/.gitignore b/.gitignore index b08b449..67f065b 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ dev.py wandb/ lightning_logs/ *.ipynb -man \ No newline at end of file +man +.pre-commit-config.yaml diff --git a/.readthedocs.yml b/.readthedocs.yml index 3bc160b..f652017 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -13,7 +13,7 @@ build: sphinx: configuration: docs/source/conf.py - + python: install: - method: pip diff --git a/.vscode/settings.json b/.vscode/settings.json index a26e06c..3f94007 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -41,4 +41,4 @@ "--convention=google", ], "python.linting.mypyEnabled": true, -} \ No newline at end of file +} diff --git a/Dockerfile b/Dockerfile index 282ed45..df83c73 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,4 +16,4 @@ RUN pip install -r /tmp/dev_requirements.txt RUN pip install -r /tmp/docs_requirements.txt RUN rm /tmp/requirements.txt RUN rm /tmp/dev_requirements.txt -RUN rm /tmp/docs_requirements.txt \ No newline at end of file +RUN rm /tmp/docs_requirements.txt diff --git a/MANIFEST.in b/MANIFEST.in index 3df7d31..87e0906 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ include versioneer.py -include simba_ml/_version.py \ No newline at end of file +include simba_ml/_version.py diff --git a/Makefile b/Makefile index 0e16606..96925b6 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ lint: simba_ml pycodestyle --max-line-length=88 --ignore E203,W503 --select W504 simba_ml pylint simba_ml pydocstyle --convention=google simba_ml - sourcery review simba_ml --check + sourcery review simba_ml --check mypy --pretty simba_ml/ --disable-error-code import --disable-error-code no-any-return --strict find simba_ml ! -iwholename "simba_ml\/\_version\.py" -name "*.py" | xargs darglint -v 2 black simba_ml --check --exclude _version.py diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/time_series_models.rst b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/time_series_models.rst index f75eaf7..f74e143 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/time_series_models.rst +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/time_series_models.rst @@ -41,45 +41,45 @@ The following code show how to integrate a model, which always predicts zero: >>> import dataclasses >>> import numpy as np >>> import numpy.typing as npt ->>> +>>> >>> from simba_ml.prediction.time_series.models import model >>> from simba_ml.prediction.time_series.models import factory ->>> ->>> +>>> +>>> >>> @dataclasses.dataclass ... class ZeroPredictorConfig(model.ModelConfig): ... """Defines the configuration for the DenseNeuralNetwork.""" ... name: str = "Zero Predictor" -... -... +... +... >>> class ZeroPredictor(model.Model): ... """Defines a model, which predicts the average of the train data.""" -... +... ... def __init__(self, input_length: int, output_length: int, config: ZeroPredictorConfig): ... """Inits the `AveragePredictor`. -... +... ... Args: ... input_length: the length of the input data. ... output_length: the length of the output data. ... config: the config for the model ... """ ... super().__init__(input_length, output_length, config) -... +... ... def set_seed(self, seed: int) -> None: -... """Sets the seed for the model. For this model, this is not required.""" +... """Sets the seed for the model. For this model, this is not required.""" ... pass -... +... ... def train(self, train: list[npt.NDArray[np.float64]], val: list[npt.NDArray[np.float64]]) -> None: ... pass -... +... ... def predict(self, data: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: ... self.validate_prediction_input(data) ... return np.full((data.shape[0], self.output_length, data.shape[2]), 0.0) -... -... +... +... >>> def register() -> None: ... factory.register( ... "ZeroPredictor", ... ZeroPredictorConfig, ... ZeroPredictor -... ) \ No newline at end of file +... ) diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/transfer_learning_models.rst b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/transfer_learning_models.rst index 77e3c08..c351141 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/transfer_learning_models.rst +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Models/transfer_learning_models.rst @@ -24,4 +24,4 @@ Include Own Model for Transfer Learning Besides the provided models, SimbaML allows for the effortless integration of any other machine learning models, for example, PyTorch Lightning and Keras .. note:: - Before applying your own model to the transfer learning pipeline, make sure that the model's weights are not reset when the train() function is called the second time. This is, for example, the case for Scikit-learn models. \ No newline at end of file + Before applying your own model to the transfer learning pipeline, make sure that the model's weights are not reset when the train() function is called the second time. This is, for example, the case for Scikit-learn models. diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/index.rst b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/index.rst index 1d6a8c8..6675229 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/index.rst +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/index.rst @@ -1,6 +1,6 @@ Time-Series Prediction Pipelines ================================ - + To enable scalable and easy-to-run machine learning experiments on time-series data, SimbaML offers multiple pipelines covering data pre-processing, training, and evaluation of ML models. .. toctree:: @@ -8,4 +8,4 @@ synthetic_data_pipeline mixed_data_pipeline - transfer_learning_pipeline \ No newline at end of file + transfer_learning_pipeline diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.rst b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.rst index 5133fd9..ad49bc6 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.rst +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.rst @@ -15,4 +15,4 @@ All provided machine learning pipelines of SimbaML can be configured based on co Start Mixed Data Pipeline -------------------------- - $ simba_ml start-prediction mixed_data --config-path mixed_data_pipeline.toml \ No newline at end of file + $ simba_ml start-prediction mixed_data --config-path mixed_data_pipeline.toml diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.toml b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.toml index a835a47..145f7ad 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.toml +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/mixed_data_pipeline.toml @@ -93,4 +93,4 @@ plugins = [ # make sure to specify the right project and entity # [logging] # project = "your-wandb-project" -# entity = "your-wandb-entity" \ No newline at end of file +# entity = "your-wandb-entity" diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.rst b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.rst index 098ffc0..b5f124c 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.rst +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.rst @@ -17,4 +17,4 @@ All provided machine learning pipelines of SimbaML can be configured based on co Start Synthetic Data Pipeline ----------------------------- - $ simba_ml start-prediction synthetic_data --config-path synthetic_data_pipeline.toml \ No newline at end of file + $ simba_ml start-prediction synthetic_data --config-path synthetic_data_pipeline.toml diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.toml b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.toml index 05129bc..a10fd94 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.toml +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/synthetic_data_pipeline.toml @@ -91,4 +91,4 @@ plugins = [ # make sure to specify the right project and entity # [logging] # project = "your-wandb-project" -# entity = "your-wandb-entity" \ No newline at end of file +# entity = "your-wandb-entity" diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.rst b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.rst index b59e8d6..5d59973 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.rst +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.rst @@ -17,4 +17,4 @@ This way, users can change the models that are going to be trained, their hyperp Start Pipeline -------------- - $ simba_ml start-prediction transfer_learning --config-path transfer_learning_pipeline.toml \ No newline at end of file + $ simba_ml start-prediction transfer_learning --config-path transfer_learning_pipeline.toml diff --git a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.toml b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.toml index 5c79ec7..659ab22 100644 --- a/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.toml +++ b/docs/source/Usage/Machine-Learning/Time-Series-Prediction/Pipelines/transfer_learning_pipeline.toml @@ -46,4 +46,4 @@ plugins = [ # make sure to specify the right project and entity # [logging] # project = "your-wandb-project" -# entity = "your-wandb-entity" \ No newline at end of file +# entity = "your-wandb-entity" diff --git a/docs/source/Usage/Machine-Learning/index.rst b/docs/source/Usage/Machine-Learning/index.rst index 0df8cff..7a53a59 100644 --- a/docs/source/Usage/Machine-Learning/index.rst +++ b/docs/source/Usage/Machine-Learning/index.rst @@ -24,10 +24,10 @@ Pipelines .. toctree:: :maxdepth: 2 - + Time-Series-Prediction/Pipelines/index Steady State Prediction ----------------------- -Coming soon! \ No newline at end of file +Coming soon! diff --git a/docs/source/Usage/Simulation/create_complex_config.rst b/docs/source/Usage/Simulation/create_complex_config.rst index 00d850e..f6d92b8 100644 --- a/docs/source/Usage/Simulation/create_complex_config.rst +++ b/docs/source/Usage/Simulation/create_complex_config.rst @@ -46,4 +46,4 @@ Create the SystemModel object -------------------------------- Save it in a variable called ‘sm’. ->>> sm = system_model.SystemModel(name, specieses, kinetic_parameters, deriv=deriv, deriv_noiser=derivative_noiser, noiser=noiser) \ No newline at end of file +>>> sm = system_model.SystemModel(name, specieses, kinetic_parameters, deriv=deriv, deriv_noiser=derivative_noiser, noiser=noiser) diff --git a/docs/source/Usage/Simulation/index.rst b/docs/source/Usage/Simulation/index.rst index 2dd9625..624edc8 100644 --- a/docs/source/Usage/Simulation/index.rst +++ b/docs/source/Usage/Simulation/index.rst @@ -9,4 +9,4 @@ data-generation with SimbaML based on the required configuration files. create_config create_complex_config - run_data_generation \ No newline at end of file + run_data_generation diff --git a/docs/source/Usage/Simulation/run_data_generation.rst b/docs/source/Usage/Simulation/run_data_generation.rst index 3036b11..39219ad 100644 --- a/docs/source/Usage/Simulation/run_data_generation.rst +++ b/docs/source/Usage/Simulation/run_data_generation.rst @@ -5,7 +5,7 @@ To run the data generation, you need execute the following command: $ simba_ml generate-data [generator] --config-module [config_module] --output-dir [output_dir] -The generator is the name of the generator to use. +The generator is the name of the generator to use. Run `simba_ml generate-data --help` to see the list of available generators. The config_module is the path of the module that contains the `SystemModel` for the generator. -The output_dir is the directory where the generated data will be stored. \ No newline at end of file +The output_dir is the directory where the generated data will be stored. diff --git a/docs/source/Usage/cli.rst b/docs/source/Usage/cli.rst index 9d97696..afcef5b 100644 --- a/docs/source/Usage/cli.rst +++ b/docs/source/Usage/cli.rst @@ -8,5 +8,5 @@ To get a list of all available commands, run: $ simba_ml --help To get help on a specific command, run: - + $ simba_ml --help diff --git a/docs/source/about/acknowledgements.rst b/docs/source/about/acknowledgements.rst index 7018e8b..4eec03d 100644 --- a/docs/source/about/acknowledgements.rst +++ b/docs/source/about/acknowledgements.rst @@ -1,9 +1,9 @@ Acknowledgements ================ -Thanks to +Thanks to - Katharina Baum - Pascal Iversen - Simon Witzke -- Bernhard Renard \ No newline at end of file +- Bernhard Renard diff --git a/docs/source/about/authors.rst b/docs/source/about/authors.rst index 26cd3f9..29616fd 100644 --- a/docs/source/about/authors.rst +++ b/docs/source/about/authors.rst @@ -4,4 +4,4 @@ Authors - Lukas Drews - Benedict Heyder - Maximilian Kleissl -- Julian Zabbarov \ No newline at end of file +- Julian Zabbarov diff --git a/docs/source/about/cite.rst b/docs/source/about/cite.rst index 17bff02..a2beb59 100644 --- a/docs/source/about/cite.rst +++ b/docs/source/about/cite.rst @@ -4,7 +4,7 @@ Reference When using SimbaML in a scientific publication, please include the following references to relevant papers. .. code-block:: latex - + @inproceedings{DBLP:conf/iclr/KleisslDHZIWRB23, author = {Maximilian Kleissl and Lukas Drews and diff --git a/docs/source/api.rst b/docs/source/api.rst index da68cde..bdcc000 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -7,4 +7,4 @@ API :template: custom-module-template.rst :recursive: - simba_ml \ No newline at end of file + simba_ml diff --git a/docs/source/contents.rst b/docs/source/contents.rst index fad534a..ea99c64 100644 --- a/docs/source/contents.rst +++ b/docs/source/contents.rst @@ -27,7 +27,7 @@ .. toctree:: :maxdepth: 1 :caption: Contributing - + contributing @@ -37,4 +37,4 @@ about/authors about/acknowledgements - about/cite \ No newline at end of file + about/cite diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index fd89b6e..c67d890 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -21,7 +21,7 @@ Test the installation by running tests and lints: # or make check # This will run all tests and lints - + Coding Standards ---------------- @@ -34,4 +34,4 @@ Coding Standards - Code should be tested and be fully covered by the tests -- Docstrings should be written according to the `Google Python Style Guide `_. \ No newline at end of file +- Docstrings should be written according to the `Google Python Style Guide `_. diff --git a/docs/source/index.rst b/docs/source/index.rst index 9422cd5..4f454aa 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -39,4 +39,4 @@ For more detailed installation instructions and requirements, see :ref:`installa For an example usage of SimbaML, see :ref:`quickstart`. -.. include:: about/cite.rst \ No newline at end of file +.. include:: about/cite.rst diff --git a/docs/source/installation.rst b/docs/source/installation.rst index f410183..32b3e2d 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -6,7 +6,7 @@ Installation SimbaML requires Python 3.10 or newer and can be installed via pip: .. code-block:: bash - + pip install simba_ml Check if installation was successfull by running: @@ -29,4 +29,4 @@ pip install pytorch-lightning>=1.9.0 pip install tensorflow>=2.10.0; platform_machine != 'arm64' -For further details on how to install Tensorflow on ARM-based MacOS devices, see: https://developer.apple.com/metal/tensorflow-plugin/ \ No newline at end of file +For further details on how to install Tensorflow on ARM-based MacOS devices, see: https://developer.apple.com/metal/tensorflow-plugin/ diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 4f90055..5673462 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -134,4 +134,4 @@ You find an examplary config for the synthetic data pipeline under :ref:`synthet .. code-block:: python from simba_ml.prediction.time_series.pipelines import synthetic_data_pipeline - result_df = synthetic_data_pipeline.main("ml_config.toml") \ No newline at end of file + result_df = synthetic_data_pipeline.main("ml_config.toml") diff --git a/docs_requirements.txt b/docs_requirements.txt index 0bc9995..b1942b7 100644 --- a/docs_requirements.txt +++ b/docs_requirements.txt @@ -3,4 +3,4 @@ furo>=2022.9.29 sphinx_design>=0.3.0 sphinx_copybutton>=0.5.1 setuptools==59.8.0 -click-man==0.4.1 \ No newline at end of file +click-man==0.4.1 diff --git a/pylintrc b/pylintrc index e651583..4082afb 100644 --- a/pylintrc +++ b/pylintrc @@ -150,7 +150,7 @@ disable=invalid-name, duplicate-code, too-many-instance-attributes, too-many-arguments - + # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time (only on the command line, not in the configuration file where diff --git a/pyproject.toml b/pyproject.toml index 139bf57..dfdda10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] requires = ["setuptools>=42", "wheel", "versioneer-518"] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index cfd39f6..0141e42 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,4 +36,4 @@ requires = ["setuptools>=42", "wheel==0.30.0", "versioneer-518"] build-backend = "setuptools.build_meta" [metadata] -licence_file = LICENSE.txt \ No newline at end of file +licence_file = LICENSE.txt diff --git a/setup.py b/setup.py index 7b317dd..a7881be 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,10 @@ "dacite", "tomli", "wandb", + "click", + "requests", + "python-libsbml", + "joblib", ], keywords=[ "python", @@ -49,6 +53,7 @@ entry_points={ "console_scripts": [ "simba_ml = simba_ml.cli.__main__:main", + "simba-ml = simba_ml.cli.main:main", ], }, license_files=("LICENSE.txt",), diff --git a/simba_ml/cli/__main__.py b/simba_ml/cli/__main__.py index 78caa0e..14f51b0 100644 --- a/simba_ml/cli/__main__.py +++ b/simba_ml/cli/__main__.py @@ -1,23 +1,35 @@ -"""This script defines a command line interface (CLI) for the SimbaML.""" +"""This script defines the legacy CLI for SimbaML (backward compatibility). + +For the new modern CLI, use: simba-ml +For legacy compatibility, use: python -m simba_ml.cli +""" import click from simba_ml.cli import generate_data from simba_ml.cli import start_prediction from simba_ml.cli.problem_viewer import run_problem_viewer -from simba_ml.cli import parse_sbml -from simba_ml.cli import biomodels +from simba_ml.cli import legacy_adapters @click.group() def main() -> None: - """CLI for SimbaML.""" + """CLI for SimbaML (Legacy Interface). + + This is the legacy CLI interface. For the modern interface with better UX, use: + + simba-ml --help + """ + +# Legacy commands - keep original names for backward compatibility +main.add_command(generate_data.generate_data, name="generate-data") +main.add_command(start_prediction.start_prediction, name="start-prediction") +main.add_command(run_problem_viewer.run_problem_viewer, name="run-problem-viewer") -main.add_command(generate_data.generate_data) -main.add_command(start_prediction.start_prediction) -main.add_command(run_problem_viewer.run_problem_viewer) -main.add_command(parse_sbml.parse_sbml) -main.add_command(biomodels.biomodels) +# New commands - use modern implementations but keep legacy names +main.add_command(legacy_adapters.parse_sbml, name="parse-sbml") +main.add_command(legacy_adapters.biomodels, name="biomodels") +main.add_command(legacy_adapters.steady_state, name="steady-state") if __name__ == "__main__": diff --git a/simba_ml/sbml_parser/__init__.py b/simba_ml/sbml_parser/__init__.py index bf2f6e2..6fe91c5 100644 --- a/simba_ml/sbml_parser/__init__.py +++ b/simba_ml/sbml_parser/__init__.py @@ -6,11 +6,11 @@ """ from .main_parser import MainSBMLParser, SBMLParsingError, UnsupportedSBMLVersionError -from .ml_exporter import SBMLMLExporter +from .ml_exporter import SBMLExporter __all__ = [ 'MainSBMLParser', - 'SBMLMLExporter', + 'SBMLExporter', 'SBMLParsingError', 'UnsupportedSBMLVersionError' -] \ No newline at end of file +] diff --git a/simba_ml/sbml_parser/biomodels_api.py b/simba_ml/sbml_parser/biomodels_api.py index 0706221..c217732 100644 --- a/simba_ml/sbml_parser/biomodels_api.py +++ b/simba_ml/sbml_parser/biomodels_api.py @@ -13,20 +13,20 @@ class BioModelsAPI: """Client for BioModels Database REST API.""" - + BASE_URL = "https://www.ebi.ac.uk/biomodels" - - def search_models(self, query: str, limit: int = 10, offset: int = 0) -> Dict[str, Any]: + + def search_models(self, query: str, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]: """ Search for models in BioModels Database. - + Args: query: Search query (model name, author, keywords) limit: Maximum number of results to return (API minimum is 10) offset: Number of results to skip - + Returns: - Dictionary containing search results + List of model dictionaries """ url = f"{self.BASE_URL}/search" params = { @@ -35,125 +35,108 @@ def search_models(self, query: str, limit: int = 10, offset: int = 0) -> Dict[st 'offset': offset, 'format': 'json' } - + response = requests.get(url, params=params) response.raise_for_status() - return response.json() - - def get_model_files(self, model_id: str) -> Dict[str, Any]: + + data = response.json() + + # Extract models from the response + if isinstance(data, dict) and 'models' in data: + models = data['models'][:limit] # Limit to requested number + # Standardize the format + return [ + { + 'model_id': model.get('id', ''), + 'name': model.get('name', ''), + 'format': model.get('format', 'SBML'), + 'submitter': model.get('submitter', ''), + 'submission_date': model.get('submissionDate', ''), + 'last_modified': model.get('lastModified', ''), + 'url': model.get('url', '') + } + for model in models + ] + else: + return [] + + def get_model_info(self, model_id: str) -> Dict[str, Any]: """ - Get information about files available for a model. - + Get information about a specific model. + Args: - model_id: Model identifier (e.g., "BIOMD0000000012", "Malkov2020") - + model_id: Model identifier (e.g., "BIOMD0000000012", "MODEL1312040000") + Returns: - Dictionary with file information + Dictionary with model information """ - url = f"{self.BASE_URL}/model/files/{model_id}" - params = {'format': 'json'} - - response = requests.get(url, params=params) - if response.status_code == 404: - raise ValueError(f"Model {model_id} not found in BioModels Database") - response.raise_for_status() - - return response.json() - - def download_model(self, model_id: str, output_dir: Optional[str] = None, - filename: Optional[str] = None) -> str: + # Try to get model information from the model endpoint + url = f"{self.BASE_URL}/{model_id}?format=json" + + try: + response = requests.get(url) + if response.status_code == 404: + raise ValueError(f"Model {model_id} not found in BioModels Database") + response.raise_for_status() + return response.json() + except Exception: + # Fallback: basic info from model ID + return { + 'model_id': model_id, + 'name': f'Model {model_id}', + 'format': 'SBML' + } + + def download_model(self, model_id: str, output_dir: Optional[str] = None) -> str: """ Download SBML model file. - + Args: - model_id: Model identifier + model_id: Model identifier (e.g., "BIOMD0000000505") output_dir: Directory to save the model. If None, saves to current directory. - filename: Specific filename to download. If None, downloads the main SBML file. - + Returns: Path to downloaded file """ # Set output directory output_path = Path(output_dir) if output_dir else Path(".") output_path.mkdir(parents=True, exist_ok=True) - - # Get model file information if filename not specified - if not filename: - model_info = self.get_model_files(model_id) - - # Find SBML file in main files - sbml_files = [f for f in model_info.get('main', []) - if f['name'].endswith(('.xml', '.sbml'))] - - if not sbml_files: - raise ValueError(f"No SBML file found for model {model_id}") - - filename = sbml_files[0]['name'] - - # Download the file + + # Get model info to find the actual filename + try: + model_info = self.get_model_info(model_id) + + # Extract main SBML file name from files section + files = model_info.get('files', {}) + main_files = files.get('main', []) + + if main_files and len(main_files) > 0: + filename = main_files[0].get('name') + if not filename: + # Fallback to standard pattern + filename = f"{model_id}_url.xml" + else: + # Fallback to standard pattern + filename = f"{model_id}_url.xml" + + except Exception: + # If we can't get model info, use standard pattern + filename = f"{model_id}_url.xml" + + # Download the file using BioModels standard download URL download_url = f"{self.BASE_URL}/model/download/{model_id}" params = {'filename': filename} - - print(f"Downloading {model_id}/{filename} from BioModels Database...") - response = requests.get(download_url, params=params) - response.raise_for_status() - - # Save file - output_file = output_path / filename - with open(output_file, 'wb') as f: - f.write(response.content) - - print(f"Downloaded: {output_file}") - return str(output_file) - - def get_model_info(self, model_id: str) -> Dict[str, Any]: - """ - Get detailed information about a model. - - Args: - model_id: Model identifier - - Returns: - Dictionary with model information - """ - url = f"{self.BASE_URL}/model/{model_id}" - params = {'format': 'json'} - - response = requests.get(url, params=params) - if response.status_code == 404: - raise ValueError(f"Model {model_id} not found in BioModels Database") - response.raise_for_status() - - return response.json() - - -def download_biomodel(model_id: str, output_dir: Optional[str] = None) -> str: - """ - Convenience function to download a BioModel. - - Args: - model_id: Model identifier (e.g., "BIOMD0000000012", "Malkov2020") - output_dir: Directory to save the model - - Returns: - Path to downloaded SBML file - """ - api = BioModelsAPI() - return api.download_model(model_id, output_dir) - - -def search_biomodels(query: str, limit: int = 10) -> List[Dict[str, Any]]: - """ - Convenience function to search BioModels. - - Args: - query: Search query - limit: Maximum number of results - - Returns: - List of model information - """ - api = BioModelsAPI() - results = api.search_models(query, max(limit, 10)) # API minimum is 10 - models = results.get('models', []) - return models[:limit] # Trim to requested limit + + try: + response = requests.get(download_url, params=params, allow_redirects=True) + response.raise_for_status() + + # Save file with original filename + output_file = output_path / filename + with open(output_file, 'wb') as f: + f.write(response.content) + + return str(output_file) + + except requests.RequestException as e: + raise ValueError(f"Failed to download model {model_id}: {e}") diff --git a/simba_ml/sbml_parser/level_2/parser.py b/simba_ml/sbml_parser/level_2/parser.py index 557c9ae..687c473 100644 --- a/simba_ml/sbml_parser/level_2/parser.py +++ b/simba_ml/sbml_parser/level_2/parser.py @@ -9,7 +9,7 @@ class Parser: Parser for SBML Level 2 models (versions 4 and 5). Focuses on ODE model extraction and conversion. """ - + def __init__(self, file_path, level=2, version=None): self.file_path = file_path self.level = level @@ -20,7 +20,7 @@ def __init__(self, file_path, level=2, version=None): def parse(self): """ Parse SBML Level 2 file and extract ODE model components. - + Returns: dict: Parsed model data with species, reactions, parameters, compartments """ @@ -28,12 +28,12 @@ def parse(self): reader = SBMLReader() self.document = reader.readSBML(self.file_path) self.model = self.document.getModel() - + if self.model is None: raise SBMLParsingError("No model found in SBML file") - + logger.info(f"Parsing SBML Level 2 Version {self.version or 'unknown'} file: {self.file_path}") - + parsed_data = { 'sbml_info': self._get_sbml_info(), 'species': self._parse_species(), @@ -43,9 +43,9 @@ def parse(self): 'rules': self._parse_rules(), 'initial_assignments': self._parse_initial_assignments() } - + return parsed_data - + except Exception as e: if isinstance(e, SBMLParsingError): raise @@ -68,7 +68,7 @@ def _get_sbml_info(self): def _parse_species(self): """Parse species information for ODE variables.""" species_list = [] - + for i in range(self.model.getNumSpecies()): species = self.model.getSpecies(i) species_data = { @@ -85,16 +85,16 @@ def _parse_species(self): 'sbo_term': species.getSBOTermID() if species.isSetSBOTerm() else None } species_list.append(species_data) - + return species_list def _parse_reactions(self): """Parse reactions and kinetic laws for ODE system.""" reactions_list = [] - + for i in range(self.model.getNumReactions()): reaction = self.model.getReaction(i) - + # Parse reactants reactants = [] for j in range(reaction.getNumReactants()): @@ -104,8 +104,8 @@ def _parse_reactions(self): 'stoichiometry': reactant.getStoichiometry(), 'constant': reactant.getConstant() if hasattr(reactant, 'getConstant') else True }) - - # Parse products + + # Parse products products = [] for j in range(reaction.getNumProducts()): product = reaction.getProduct(j) @@ -114,7 +114,7 @@ def _parse_reactions(self): 'stoichiometry': product.getStoichiometry(), 'constant': product.getConstant() if hasattr(product, 'getConstant') else True }) - + # Parse modifiers modifiers = [] for j in range(reaction.getNumModifiers()): @@ -122,7 +122,7 @@ def _parse_reactions(self): modifiers.append({ 'species': modifier.getSpecies() }) - + # Parse kinetic law kinetic_law = None if reaction.isSetKineticLaw(): @@ -134,7 +134,7 @@ def _parse_reactions(self): 'substance_units': kl.getSubstanceUnits() if kl.isSetSubstanceUnits() else None, 'time_units': kl.getTimeUnits() if kl.isSetTimeUnits() else None } - + reaction_data = { 'id': reaction.getId(), 'name': reaction.getName() if reaction.isSetName() else reaction.getId(), @@ -148,13 +148,13 @@ def _parse_reactions(self): 'sbo_term': reaction.getSBOTermID() if reaction.isSetSBOTerm() else None } reactions_list.append(reaction_data) - + return reactions_list def _parse_parameters(self): """Parse global parameters.""" parameters_list = [] - + for i in range(self.model.getNumParameters()): param = self.model.getParameter(i) param_data = { @@ -167,13 +167,13 @@ def _parse_parameters(self): 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None } parameters_list.append(param_data) - + return parameters_list def _parse_compartments(self): """Parse compartment information.""" compartments_list = [] - + for i in range(self.model.getNumCompartments()): comp = self.model.getCompartment(i) comp_data = { @@ -187,17 +187,17 @@ def _parse_compartments(self): 'sbo_term': comp.getSBOTermID() if comp.isSetSBOTerm() else None } compartments_list.append(comp_data) - + return compartments_list def _parse_rules(self): """Parse assignment, rate, and algebraic rules.""" rules_list = [] - + for i in range(self.model.getNumRules()): rule = self.model.getRule(i) rule_type = rule.getTypeCode() - + rule_data = { 'type': self._get_rule_type_name(rule_type), 'variable': rule.getVariable() if hasattr(rule, 'getVariable') else None, @@ -207,13 +207,13 @@ def _parse_rules(self): 'sbo_term': rule.getSBOTermID() if rule.isSetSBOTerm() else None } rules_list.append(rule_data) - + return rules_list def _parse_initial_assignments(self): """Parse initial assignments (Level 2 Version 2+).""" assignments_list = [] - + if hasattr(self.model, 'getNumInitialAssignments'): for i in range(self.model.getNumInitialAssignments()): assignment = self.model.getInitialAssignment(i) @@ -225,13 +225,13 @@ def _parse_initial_assignments(self): 'sbo_term': assignment.getSBOTermID() if assignment.isSetSBOTerm() else None } assignments_list.append(assign_data) - + return assignments_list def _parse_local_parameters(self, kinetic_law): """Parse local parameters within kinetic laws.""" local_params = [] - + for i in range(kinetic_law.getNumParameters()): param = kinetic_law.getParameter(i) param_data = { @@ -243,20 +243,53 @@ def _parse_local_parameters(self, kinetic_law): 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None } local_params.append(param_data) - + return local_params def _get_notes(self, element): """Extract notes/annotations from SBML element.""" if element.isSetNotes(): - return element.getNotesString() + notes_xml = element.getNotesString() + return self._clean_notes_text(notes_xml) return None + def _clean_notes_text(self, notes_xml): + """Extract clean text from SBML notes XML.""" + if not notes_xml: + return None + + try: + import re + from html import unescape + + # Remove XML/HTML tags + clean_text = re.sub(r'<[^>]+>', ' ', notes_xml) + + # Decode HTML entities + clean_text = unescape(clean_text) + + # Clean up whitespace + clean_text = ' '.join(clean_text.split()) + + # Remove common SBML boilerplate + clean_text = re.sub(r'This model is hosted on.*?BioModels Database.*?\.', '', clean_text, flags=re.DOTALL) + clean_text = re.sub(r'To cite BioModels Database.*?models\.', '', clean_text, flags=re.DOTALL) + clean_text = re.sub(r'To the extent possible under law.*?Dedication.*?\.', '', clean_text, flags=re.DOTALL) + + # Clean up extra whitespace again + clean_text = ' '.join(clean_text.split()) + + return clean_text.strip() if clean_text.strip() else None + + except Exception: + # Fallback to original if cleaning fails + return notes_xml + def _get_rule_type_name(self, type_code): """Convert rule type code to readable name.""" type_names = { - 1: 'assignment', # SBML_ASSIGNMENT_RULE - 2: 'rate', # SBML_RATE_RULE - 3: 'algebraic' # SBML_ALGEBRAIC_RULE + 21: 'algebraic', # SBML_ALGEBRAIC_RULE + 22: 'assignment', # SBML_ASSIGNMENT_RULE + 23: 'rate' # SBML_RATE_RULE } return type_names.get(type_code, 'unknown') diff --git a/simba_ml/sbml_parser/level_3/parser.py b/simba_ml/sbml_parser/level_3/parser.py index c6c98b1..10a02eb 100644 --- a/simba_ml/sbml_parser/level_3/parser.py +++ b/simba_ml/sbml_parser/level_3/parser.py @@ -9,7 +9,7 @@ class Parser: Parser for SBML Level 3 models (versions 1 and 2). Enhanced parser supporting Level 3 features like conversionFactors and extensions. """ - + def __init__(self, file_path, level=3, version=None): self.file_path = file_path self.level = level @@ -20,7 +20,7 @@ def __init__(self, file_path, level=3, version=None): def parse(self): """ Parse SBML Level 3 file and extract ODE model components. - + Returns: dict: Parsed model data with species, reactions, parameters, compartments """ @@ -28,12 +28,12 @@ def parse(self): reader = SBMLReader() self.document = reader.readSBML(self.file_path) self.model = self.document.getModel() - + if self.model is None: raise SBMLParsingError("No model found in SBML file") - + logger.info(f"Parsing SBML Level 3 Version {self.version or 'unknown'} file: {self.file_path}") - + parsed_data = { 'sbml_info': self._get_sbml_info(), 'species': self._parse_species(), @@ -47,9 +47,9 @@ def parse(self): 'unit_definitions': self._parse_unit_definitions(), 'function_definitions': self._parse_function_definitions() } - + return parsed_data - + except Exception as e: if isinstance(e, SBMLParsingError): raise @@ -81,7 +81,7 @@ def _get_sbml_info(self): def _parse_species(self): """Parse species information with Level 3 enhancements.""" species_list = [] - + for i in range(self.model.getNumSpecies()): species = self.model.getSpecies(i) species_data = { @@ -100,16 +100,16 @@ def _parse_species(self): 'metaid': species.getMetaId() if species.isSetMetaId() else None } species_list.append(species_data) - + return species_list def _parse_reactions(self): """Parse reactions with Level 3 features.""" reactions_list = [] - + for i in range(self.model.getNumReactions()): reaction = self.model.getReaction(i) - + # Parse reactants reactants = [] for j in range(reaction.getNumReactants()): @@ -119,8 +119,8 @@ def _parse_reactions(self): 'stoichiometry': reactant.getStoichiometry(), 'constant': reactant.getConstant() }) - - # Parse products + + # Parse products products = [] for j in range(reaction.getNumProducts()): product = reaction.getProduct(j) @@ -129,7 +129,7 @@ def _parse_reactions(self): 'stoichiometry': product.getStoichiometry(), 'constant': product.getConstant() }) - + # Parse modifiers modifiers = [] for j in range(reaction.getNumModifiers()): @@ -137,7 +137,7 @@ def _parse_reactions(self): modifiers.append({ 'species': modifier.getSpecies() }) - + # Parse kinetic law kinetic_law = None if reaction.isSetKineticLaw(): @@ -147,7 +147,7 @@ def _parse_reactions(self): 'math': formulaToString(kl.getMath()) if kl.isSetMath() else None, 'parameters': self._parse_local_parameters(kl) } - + reaction_data = { 'id': reaction.getId(), 'name': reaction.getName() if reaction.isSetName() else reaction.getId(), @@ -162,13 +162,13 @@ def _parse_reactions(self): 'metaid': reaction.getMetaId() if reaction.isSetMetaId() else None } reactions_list.append(reaction_data) - + return reactions_list def _parse_parameters(self): """Parse global parameters.""" parameters_list = [] - + for i in range(self.model.getNumParameters()): param = self.model.getParameter(i) param_data = { @@ -182,13 +182,13 @@ def _parse_parameters(self): 'metaid': param.getMetaId() if param.isSetMetaId() else None } parameters_list.append(param_data) - + return parameters_list def _parse_compartments(self): """Parse compartment information with Level 3 features.""" compartments_list = [] - + for i in range(self.model.getNumCompartments()): comp = self.model.getCompartment(i) comp_data = { @@ -203,17 +203,17 @@ def _parse_compartments(self): 'metaid': comp.getMetaId() if comp.isSetMetaId() else None } compartments_list.append(comp_data) - + return compartments_list def _parse_rules(self): """Parse assignment, rate, and algebraic rules.""" rules_list = [] - + for i in range(self.model.getNumRules()): rule = self.model.getRule(i) rule_type = rule.getTypeCode() - + rule_data = { 'type': self._get_rule_type_name(rule_type), 'variable': rule.getVariable() if hasattr(rule, 'getVariable') else None, @@ -224,13 +224,13 @@ def _parse_rules(self): 'metaid': rule.getMetaId() if rule.isSetMetaId() else None } rules_list.append(rule_data) - + return rules_list def _parse_initial_assignments(self): """Parse initial assignments.""" assignments_list = [] - + for i in range(self.model.getNumInitialAssignments()): assignment = self.model.getInitialAssignment(i) assign_data = { @@ -242,17 +242,17 @@ def _parse_initial_assignments(self): 'metaid': assignment.getMetaId() if assignment.isSetMetaId() else None } assignments_list.append(assign_data) - + return assignments_list def _parse_events(self): """Parse events (Level 2 Version 2+, Level 3).""" events_list = [] - + if hasattr(self.model, 'getNumEvents'): for i in range(self.model.getNumEvents()): event = self.model.getEvent(i) - + # Parse trigger trigger_data = None if event.isSetTrigger(): @@ -263,7 +263,7 @@ def _parse_events(self): 'initial_value': trigger.getInitialValue() if hasattr(trigger, 'getInitialValue') else None, 'persistent': trigger.getPersistent() if hasattr(trigger, 'getPersistent') else None } - + # Parse delay delay_data = None if event.isSetDelay(): @@ -272,7 +272,7 @@ def _parse_events(self): 'formula': delay.getFormula() if delay.isSetFormula() else None, 'math': formulaToString(delay.getMath()) if delay.isSetMath() else None } - + # Parse event assignments assignments = [] for j in range(event.getNumEventAssignments()): @@ -282,7 +282,7 @@ def _parse_events(self): 'formula': ea.getFormula() if ea.isSetFormula() else None, 'math': formulaToString(ea.getMath()) if ea.isSetMath() else None }) - + event_data = { 'id': event.getId() if event.isSetId() else None, 'name': event.getName() if event.isSetName() else None, @@ -294,13 +294,13 @@ def _parse_events(self): 'sbo_term': event.getSBOTermID() if event.isSetSBOTerm() else None } events_list.append(event_data) - + return events_list def _parse_constraints(self): """Parse constraints (Level 2 Version 2+, Level 3).""" constraints_list = [] - + if hasattr(self.model, 'getNumConstraints'): for i in range(self.model.getNumConstraints()): constraint = self.model.getConstraint(i) @@ -312,16 +312,16 @@ def _parse_constraints(self): 'sbo_term': constraint.getSBOTermID() if constraint.isSetSBOTerm() else None } constraints_list.append(constraint_data) - + return constraints_list def _parse_unit_definitions(self): """Parse unit definitions.""" unit_defs = [] - + for i in range(self.model.getNumUnitDefinitions()): unit_def = self.model.getUnitDefinition(i) - + units = [] for j in range(unit_def.getNumUnits()): unit = unit_def.getUnit(j) @@ -331,7 +331,7 @@ def _parse_unit_definitions(self): 'scale': unit.getScale(), 'multiplier': unit.getMultiplier() }) - + unit_def_data = { 'id': unit_def.getId(), 'name': unit_def.getName() if unit_def.isSetName() else unit_def.getId(), @@ -340,13 +340,13 @@ def _parse_unit_definitions(self): 'sbo_term': unit_def.getSBOTermID() if unit_def.isSetSBOTerm() else None } unit_defs.append(unit_def_data) - + return unit_defs def _parse_function_definitions(self): """Parse function definitions.""" function_defs = [] - + for i in range(self.model.getNumFunctionDefinitions()): func_def = self.model.getFunctionDefinition(i) func_data = { @@ -358,13 +358,13 @@ def _parse_function_definitions(self): 'sbo_term': func_def.getSBOTermID() if func_def.isSetSBOTerm() else None } function_defs.append(func_data) - + return function_defs def _parse_local_parameters(self, kinetic_law): """Parse local parameters within kinetic laws.""" local_params = [] - + for i in range(kinetic_law.getNumLocalParameters()): param = kinetic_law.getLocalParameter(i) param_data = { @@ -376,20 +376,53 @@ def _parse_local_parameters(self, kinetic_law): 'sbo_term': param.getSBOTermID() if param.isSetSBOTerm() else None } local_params.append(param_data) - + return local_params def _get_notes(self, element): """Extract notes/annotations from SBML element.""" if element.isSetNotes(): - return element.getNotesString() + notes_xml = element.getNotesString() + return self._clean_notes_text(notes_xml) return None + def _clean_notes_text(self, notes_xml): + """Extract clean text from SBML notes XML.""" + if not notes_xml: + return None + + try: + import re + from html import unescape + + # Remove XML/HTML tags + clean_text = re.sub(r'<[^>]+>', ' ', notes_xml) + + # Decode HTML entities + clean_text = unescape(clean_text) + + # Clean up whitespace + clean_text = ' '.join(clean_text.split()) + + # Remove common SBML boilerplate + clean_text = re.sub(r'This model is hosted on.*?BioModels Database.*?\.', '', clean_text, flags=re.DOTALL) + clean_text = re.sub(r'To cite BioModels Database.*?models\.', '', clean_text, flags=re.DOTALL) + clean_text = re.sub(r'To the extent possible under law.*?Dedication.*?\.', '', clean_text, flags=re.DOTALL) + + # Clean up extra whitespace again + clean_text = ' '.join(clean_text.split()) + + return clean_text.strip() if clean_text.strip() else None + + except Exception: + # Fallback to original if cleaning fails + return notes_xml + def _get_rule_type_name(self, type_code): """Convert rule type code to readable name.""" type_names = { - 1: 'assignment', # SBML_ASSIGNMENT_RULE - 2: 'rate', # SBML_RATE_RULE - 3: 'algebraic' # SBML_ALGEBRAIC_RULE + 21: 'algebraic', # SBML_ALGEBRAIC_RULE + 22: 'assignment', # SBML_ASSIGNMENT_RULE + 23: 'rate' # SBML_RATE_RULE } return type_names.get(type_code, 'unknown') diff --git a/simba_ml/sbml_parser/main_parser.py b/simba_ml/sbml_parser/main_parser.py index 65a5222..b2ce963 100644 --- a/simba_ml/sbml_parser/main_parser.py +++ b/simba_ml/sbml_parser/main_parser.py @@ -16,20 +16,20 @@ class UnsupportedSBMLVersionError(Exception): class MainSBMLParser: """ Main SBML parser that detects SBML level/version and routes to appropriate parser. - + Supports commonly used SBML versions for ODE models: - Level 2: Version 4, 5 - Level 3: Version 1, 2 """ - + # Define supported SBML level/version combinations SUPPORTED_VERSIONS = { (2, 4): "level_2.parser", - (2, 5): "level_2.parser", + (2, 5): "level_2.parser", (3, 1): "level_3.parser", (3, 2): "level_3.parser" } - + def __init__(self, file_path): self.file_path = file_path self.level = None @@ -39,10 +39,10 @@ def __init__(self, file_path): def detect_version_and_level(self): """ Parse SBML file to detect level and version. - + Returns: tuple: (level, version, model) from the SBML document - + Raises: SBMLParsingError: If file cannot be parsed or contains errors """ @@ -63,14 +63,14 @@ def detect_version_and_level(self): level = document.getLevel() version = document.getVersion() - + self.level = level self.version = version self.model = model - + logger.info(f"Detected SBML Level {level}, Version {version}") return level, version, model - + except Exception as e: if isinstance(e, (SBMLParsingError, UnsupportedSBMLVersionError)): raise @@ -79,37 +79,51 @@ def detect_version_and_level(self): def validate_ode_model(self, model): """ Validate that the SBML model represents an ODE system. - + Args: model: SBML model object - + Raises: SBMLParsingError: If model doesn't appear to be ODE-based """ - if model.getListOfReactions().size() == 0: - logger.warning("No reactions found - this may not be a dynamic ODE model") - - # Check for basic ODE model requirements + num_reactions = model.getListOfReactions().size() + num_rules = model.getListOfRules().size() + + # Check for rate rules (direct ODE specification) + has_rate_rules = False + if num_rules > 0: + for rule in model.getListOfRules(): + if rule.getTypeCode() == 23: # SBML_RATE_RULE + has_rate_rules = True + break + + # Check for reactions with kinetic laws has_kinetic_laws = False - for reaction in model.getListOfReactions(): - if reaction.getKineticLaw() is not None: - has_kinetic_laws = True - break - - if not has_kinetic_laws and model.getListOfReactions().size() > 0: - logger.warning("Reactions found but no kinetic laws - this may not be suitable for ODE simulation") + if num_reactions > 0: + for reaction in model.getListOfReactions(): + if reaction.getKineticLaw() is not None: + has_kinetic_laws = True + break + + # Determine if this is a valid ODE model + if num_reactions == 0 and not has_rate_rules: + logger.warning("No reactions or rate rules found - this may not be a dynamic ODE model") + elif num_reactions > 0 and not has_kinetic_laws and not has_rate_rules: + logger.warning("Reactions found but no kinetic laws or rate rules - this may not be suitable for ODE simulation") + elif has_rate_rules and num_reactions == 0: + logger.info(f"Rule-based ODE model detected with {num_rules} rules") def get_parser_module(self, level, version): """ Get the appropriate parser module for the given level/version. - + Args: level: SBML level version: SBML version - + Returns: str: Module path for the parser - + Raises: UnsupportedSBMLVersionError: If level/version combination is not supported """ @@ -119,28 +133,28 @@ def get_parser_module(self, level, version): f"SBML Level {level} Version {version} is not supported. " f"Supported versions: {', '.join(supported_versions)}" ) - + return self.SUPPORTED_VERSIONS[(level, version)] def process(self): """ Main processing method that detects version and delegates to appropriate parser. - + Returns: Parsed model data structure - + Raises: UnsupportedSBMLVersionError: If SBML version is not supported SBMLParsingError: If parsing fails """ level, version, model = self.detect_version_and_level() - + # Validate ODE model characteristics self.validate_ode_model(model) - + # Get and instantiate the appropriate parser parser_module_path = self.get_parser_module(level, version) - + try: if parser_module_path == "level_2.parser": from .level_2.parser import Parser as VersionParser @@ -148,16 +162,16 @@ def process(self): from .level_3.parser import Parser as VersionParser else: raise ImportError(f"Unknown parser module: {parser_module_path}") - + parser = VersionParser(self.file_path, level, version) parsed_data = parser.parse() - + # Add metadata with file path for units parsing if 'metadata' not in parsed_data: parsed_data['metadata'] = {} parsed_data['metadata']['sbml_file_path'] = self.file_path - + return parsed_data - + except ImportError as e: raise SBMLParsingError(f"Failed to import parser for Level {level} Version {version}: {str(e)}") diff --git a/simba_ml/sbml_parser/ml_exporter.py b/simba_ml/sbml_parser/ml_exporter.py index 85c96cd..1843e81 100644 --- a/simba_ml/sbml_parser/ml_exporter.py +++ b/simba_ml/sbml_parser/ml_exporter.py @@ -1,7 +1,7 @@ """ -ML Data Exporter for SBML models. +Data Exporter for SBML models. -Converts parsed SBML data into machine learning-ready formats including: +Converts parsed SBML data into numerical solver ready formats including: - Structured DataFrames for species, reactions, parameters - Network matrices (stoichiometry, adjacency) - Feature vectors for ML training @@ -16,13 +16,13 @@ from pathlib import Path -class SBMLMLExporter: - """Export SBML parsed data in machine learning-ready formats.""" - +class SBMLExporter: + """Export SBML parsed data in different formats.""" + def __init__(self, parsed_data: Dict[str, Any]): """ Initialize with parsed SBML data. - + Args: parsed_data: Output from MainSBMLParser.process() """ @@ -32,31 +32,31 @@ def __init__(self, parsed_data: Dict[str, Any]): self.reactions = parsed_data['reactions'] self.parameters = parsed_data['parameters'] self.compartments = parsed_data['compartments'] - + # Process species to separate dynamic from boundary self._process_species_types() self._parse_units_system() self._normalize_species_units() - + def _process_species_types(self): """Separate dynamic species from boundary/constant species.""" self.dynamic_species = [] self.boundary_species = [] - + for sp in self.species: is_boundary = sp.get('boundary_condition', False) is_constant = sp.get('constant', False) - + if is_boundary or is_constant: self.boundary_species.append(sp) else: self.dynamic_species.append(sp) - + def _parse_units_system(self): """Parse SBML units system using libSBML.""" # Initialize units info based on SBML Level level = self.sbml_info['level'] - + if level == 2: # Level 2: Use SBML specification defaults self.units_info = { @@ -84,7 +84,7 @@ def _parse_units_system(self): 'substance_multiplier': 1.0, 'time_multiplier': 1.0 } - + # We need to re-parse with libSBML to get units info # This is necessary because the main parser doesn't extract unit definitions if 'sbml_file_path' in self.data.get('metadata', {}): @@ -92,26 +92,26 @@ def _parse_units_system(self): else: # If file path not available, we'll work with defaults return - + try: reader = libsbml.SBMLReader() doc = reader.readSBML(file_path) model = doc.getModel() - + # Parse unit definitions self._extract_unit_definitions(model) - + except Exception as e: # If units parsing fails, use defaults pass - + def _extract_unit_definitions(self, model): """Extract unit definitions from libSBML model.""" # Check for custom unit definitions for i in range(model.getNumUnitDefinitions()): unit_def = model.getUnitDefinition(i) unit_id = unit_def.getId() - + if unit_id in ['substance', 'time', 'volume']: # Parse the unit definition if unit_def.getNumUnits() > 0: @@ -119,10 +119,10 @@ def _extract_unit_definitions(self, model): kind = libsbml.UnitKind_toString(unit.getKind()) scale = unit.getScale() multiplier = unit.getMultiplier() - + # Calculate actual multiplier: multiplier * 10^scale actual_multiplier = multiplier * (10 ** scale) - + if unit_id == 'substance': self.units_info['substance_unit'] = kind self.units_info['substance_multiplier'] = actual_multiplier @@ -131,10 +131,10 @@ def _extract_unit_definitions(self, model): self.units_info['time_multiplier'] = actual_multiplier elif unit_id == 'volume': self.units_info['volume_unit'] = kind - + # Set model-level units if specified level = self.sbml_info['level'] - + if model.isSetSubstanceUnits(): substance_unit_ref = model.getSubstanceUnits() # For Level 3, this should reference a unit definition @@ -143,7 +143,7 @@ def _extract_unit_definitions(self, model): pass else: self.units_info['substance_unit'] = substance_unit_ref - + if model.isSetTimeUnits(): time_unit_ref = model.getTimeUnits() # For Level 3, this should reference a unit definition @@ -152,7 +152,7 @@ def _extract_unit_definitions(self, model): pass else: self.units_info['time_unit'] = time_unit_ref - + if model.isSetVolumeUnits(): volume_unit_ref = model.getVolumeUnits() # For Level 3, this should reference a unit definition @@ -161,90 +161,100 @@ def _extract_unit_definitions(self, model): pass else: self.units_info['volume_unit'] = volume_unit_ref - + # For Level 3, validate that all required units are explicitly defined if level == 3: self._validate_level3_units() - + def _validate_level3_units(self): """Validate that Level 3 models have required units explicitly defined.""" import logging logger = logging.getLogger(__name__) - + missing_units = [] - + # Check if model actually needs these units needs_substance = self._model_uses_concentrations_or_amounts() - needs_time = self._model_has_kinetic_laws() + needs_time = self._model_is_ode_ready() needs_volume = self._model_uses_concentrations() - + if needs_substance and self.units_info['substance_unit'] is None: missing_units.append('substance') if needs_time and self.units_info['time_unit'] is None: - missing_units.append('time') + missing_units.append('time') if needs_volume and self.units_info['volume_unit'] is None: missing_units.append('volume') - + if missing_units: error_msg = (f"SBML Level 3 model missing required unit definitions: {', '.join(missing_units)}. " f"Level 3 specification requires all used units to be explicitly defined.") logger.error(error_msg) raise ValueError(error_msg) - + def _model_uses_concentrations_or_amounts(self) -> bool: """Check if model uses species amounts or concentrations.""" - return any(sp.get('initial_concentration') is not None or - sp.get('initial_amount') is not None + return any(sp.get('initial_concentration') is not None or + sp.get('initial_amount') is not None for sp in self.species) - + + def _model_is_ode_ready(self) -> bool: + """Check if model is ready for ODE simulation (has kinetic laws or rate rules).""" + # Check reactions with kinetic laws + has_kinetic_laws = any(rxn.get('kinetic_law') is not None for rxn in self.reactions) + + # Check for rate rules + has_rate_rules = any(rule.get('type') == 'rate' for rule in getattr(self, 'rules', [])) + + return has_kinetic_laws or has_rate_rules + def _model_has_kinetic_laws(self) -> bool: """Check if model has kinetic laws (needs time units).""" return any(rxn.get('kinetic_law') is not None for rxn in self.reactions) - + def _model_uses_concentrations(self) -> bool: """Check if model uses concentrations (needs volume units).""" return any(sp.get('initial_concentration') is not None for sp in self.species) - - + + def _get_compartment_size(self, compartment_id: str) -> float: """Get size of a compartment by ID.""" for comp in self.compartments: if comp['id'] == compartment_id: return comp.get('size', 1.0) return 1.0 # Default size if not found - + def _normalize_species_units(self): """Convert all species to concentration units for consistent ODE formulation.""" for sp in self.species: compartment_size = self._get_compartment_size(sp['compartment']) - + # Convert to concentration if needed if sp.get('initial_concentration') is not None: # Already in concentration units sp['normalized_concentration'] = sp['initial_concentration'] sp['units_type'] = 'concentration' - + elif sp.get('initial_amount') is not None: # Convert amount to concentration: [X] = amount / volume sp['normalized_concentration'] = sp['initial_amount'] / compartment_size sp['units_type'] = 'amount_converted' - + else: # No initial condition specified sp['normalized_concentration'] = 0.0 sp['units_type'] = 'default' - + # Add units information from parsed SBML sp['substance_unit'] = self.units_info['substance_unit'] sp['substance_multiplier'] = self.units_info['substance_multiplier'] sp['time_unit'] = self.units_info['time_unit'] sp['time_multiplier'] = self.units_info['time_multiplier'] sp['volume_unit'] = self.units_info['volume_unit'] - + def get_dynamic_species_concentrations(self) -> Tuple[np.ndarray, List[str]]: """ Get normalized initial concentrations for dynamic species only. - + Returns: tuple: (concentrations_array, species_ids) - concentrations_array: Initial concentrations for ODE system @@ -252,17 +262,17 @@ def get_dynamic_species_concentrations(self) -> Tuple[np.ndarray, List[str]]: """ concentrations = [] species_ids = [] - + for sp in self.dynamic_species: concentrations.append(sp['normalized_concentration']) species_ids.append(sp['id']) - + return np.array(concentrations), species_ids - + def get_boundary_species_info(self) -> List[Dict[str, Any]]: """ Get information about boundary/constant species. - + Returns: list: Information about boundary species that remain constant """ @@ -275,16 +285,16 @@ def get_boundary_species_info(self) -> List[Dict[str, Any]]: 'constant': sp.get('constant', False) }) return boundary_info - + def to_dataframes(self) -> Dict[str, pd.DataFrame]: """ Convert parsed data to pandas DataFrames. - + Returns: dict: DataFrames for different components """ dataframes = {} - + # Species DataFrame if self.species: species_data = [] @@ -308,7 +318,7 @@ def to_dataframes(self) -> Dict[str, pd.DataFrame]: 'is_dynamic': not (sp.get('boundary_condition', False) or sp.get('constant', False)) }) dataframes['species'] = pd.DataFrame(species_data) - + # Reactions DataFrame if self.reactions: reaction_data = [] @@ -324,7 +334,7 @@ def to_dataframes(self) -> Dict[str, pd.DataFrame]: 'num_products': len(rxn.get('products', [])), 'num_modifiers': len(rxn.get('modifiers', [])) } - + # Add kinetic law info if available if rxn.get('kinetic_law'): kl = rxn['kinetic_law'] @@ -333,10 +343,10 @@ def to_dataframes(self) -> Dict[str, pd.DataFrame]: 'kinetic_math': kl.get('math'), 'num_local_parameters': len(kl.get('parameters', [])) }) - + reaction_data.append(rxn_row) dataframes['reactions'] = pd.DataFrame(reaction_data) - + # Parameters DataFrame if self.parameters: param_data = [] @@ -349,7 +359,7 @@ def to_dataframes(self) -> Dict[str, pd.DataFrame]: 'constant': param.get('constant', True) }) dataframes['parameters'] = pd.DataFrame(param_data) - + # Compartments DataFrame if self.compartments: comp_data = [] @@ -362,20 +372,20 @@ def to_dataframes(self) -> Dict[str, pd.DataFrame]: 'constant': comp.get('constant', True) }) dataframes['compartments'] = pd.DataFrame(comp_data) - + return dataframes - + def get_stoichiometry_matrix(self, dynamic_only: bool = True) -> Tuple[np.ndarray, List[str], List[str]]: """ Create stoichiometry matrix for the reaction network. - + Args: dynamic_only: If True, only include non-boundary, non-constant species - + Returns: tuple: (matrix, species_ids, reaction_ids) - matrix: shape (n_dynamic_species, n_reactions) or (n_species, n_reactions) - - species_ids: list of species identifiers + - species_ids: list of species identifiers - reaction_ids: list of reaction identifiers """ if dynamic_only: @@ -384,15 +394,15 @@ def get_stoichiometry_matrix(self, dynamic_only: bool = True) -> Tuple[np.ndarra else: species_list = self.species species_ids = [sp['id'] for sp in species_list] - + reaction_ids = [rxn['id'] for rxn in self.reactions] - + # Create species index mapping species_idx = {sp_id: i for i, sp_id in enumerate(species_ids)} - + # Initialize stoichiometry matrix S = np.zeros((len(species_ids), len(reaction_ids))) - + for j, reaction in enumerate(self.reactions): # Add reactants (negative stoichiometry) for reactant in reaction.get('reactants', []): @@ -401,7 +411,7 @@ def get_stoichiometry_matrix(self, dynamic_only: bool = True) -> Tuple[np.ndarra i = species_idx[sp_id] stoich = reactant.get('stoichiometry', 1.0) S[i, j] -= stoich - + # Add products (positive stoichiometry) for product in reaction.get('products', []): sp_id = product['species'] @@ -409,16 +419,16 @@ def get_stoichiometry_matrix(self, dynamic_only: bool = True) -> Tuple[np.ndarra i = species_idx[sp_id] stoich = product.get('stoichiometry', 1.0) S[i, j] += stoich - + return S, species_ids, reaction_ids - + def get_adjacency_matrix(self, include_modifiers: bool = True) -> Tuple[np.ndarray, List[str]]: """ Create adjacency matrix representing species-species interactions. - + Args: include_modifiers: Whether to include modifier relationships - + Returns: tuple: (adjacency_matrix, species_ids) - adjacency_matrix: shape (n_species, n_species) @@ -426,22 +436,22 @@ def get_adjacency_matrix(self, include_modifiers: bool = True) -> Tuple[np.ndarr """ species_ids = [sp['id'] for sp in self.species] species_idx = {sp_id: i for i, sp_id in enumerate(species_ids)} - + # Initialize adjacency matrix A = np.zeros((len(species_ids), len(species_ids))) - + for reaction in self.reactions: reactant_ids = [r['species'] for r in reaction.get('reactants', [])] product_ids = [p['species'] for p in reaction.get('products', [])] modifier_ids = [m['species'] for m in reaction.get('modifiers', [])] if include_modifiers else [] - + # Reactants to products for reactant_id in reactant_ids: for product_id in product_ids: if reactant_id in species_idx and product_id in species_idx: i, j = species_idx[reactant_id], species_idx[product_id] A[i, j] = 1 - + # Modifiers to products (regulatory interactions) if include_modifiers: for modifier_id in modifier_ids: @@ -449,18 +459,18 @@ def get_adjacency_matrix(self, include_modifiers: bool = True) -> Tuple[np.ndarr if modifier_id in species_idx and product_id in species_idx: i, j = species_idx[modifier_id], species_idx[product_id] A[i, j] = 1 - + return A, species_ids - + def get_feature_vectors(self) -> Dict[str, np.ndarray]: """ Extract feature vectors for ML training. - + Returns: dict: Feature vectors for different components """ features = {} - + # Species features if self.species: species_features = [] @@ -473,7 +483,7 @@ def get_feature_vectors(self) -> Dict[str, np.ndarray]: ] species_features.append(feat) features['species'] = np.array(species_features) - + # Reaction features if self.reactions: reaction_features = [] @@ -488,12 +498,12 @@ def get_feature_vectors(self) -> Dict[str, np.ndarray]: ] reaction_features.append(feat) features['reactions'] = np.array(reaction_features) - + # Network topology features if self.species and self.reactions: S, _, _ = self.get_stoichiometry_matrix() A, _ = self.get_adjacency_matrix() - + # Network-level features network_features = [ len(self.species), # Number of species @@ -504,13 +514,13 @@ def get_feature_vectors(self) -> Dict[str, np.ndarray]: np.mean(np.sum(A, axis=1)), # Average adjacency degree ] features['network'] = np.array(network_features) - + return features - + def get_ml_dataset(self) -> Dict[str, Any]: """ - Get comprehensive ML-ready dataset. - + Get comprehensive dataset. + Returns: dict: Complete dataset with matrices, features, and metadata """ @@ -523,47 +533,49 @@ def get_ml_dataset(self) -> Dict[str, Any]: 'num_species': len(self.species), 'num_reactions': len(self.reactions), 'num_parameters': len(self.parameters), - 'has_kinetic_laws': any(r.get('kinetic_law') for r in self.reactions) + 'has_kinetic_laws': any(r.get('kinetic_law') for r in self.reactions), + 'has_rate_rules': any(rule.get('type') == 'rate' for rule in getattr(self, 'rules', [])), + 'ode_ready': self._model_is_ode_ready() } } - + # Add matrices if self.species and self.reactions: S, species_ids, reaction_ids = self.get_stoichiometry_matrix() A, _ = self.get_adjacency_matrix() - + dataset['matrices'] = { 'stoichiometry': S, 'adjacency': A, 'species_ids': species_ids, 'reaction_ids': reaction_ids } - + # Add feature vectors dataset['features'] = self.get_feature_vectors() - + # Add DataFrames dataset['dataframes'] = self.to_dataframes() - + return dataset - + def export_to_files(self, output_dir: str, format: str = 'csv') -> Dict[str, str]: """ Export data to files for ML workflows. - + Args: output_dir: Directory to save files format: Export format ('csv', 'json', 'npz', 'pickle') - + Returns: dict: Mapping of data type to file path """ output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) - + model_name = self.sbml_info.get('model_id', 'sbml_model') exported_files = {} - + if format == 'csv': # Export DataFrames as CSV dataframes = self.to_dataframes() @@ -571,24 +583,24 @@ def export_to_files(self, output_dir: str, format: str = 'csv') -> Dict[str, str file_path = output_path / f"{model_name}_{name}.csv" df.to_csv(file_path, index=False) exported_files[name] = str(file_path) - + # Export matrices as CSV if self.species and self.reactions: S, species_ids, reaction_ids = self.get_stoichiometry_matrix() A, _ = self.get_adjacency_matrix() - + # Stoichiometry matrix with labels S_df = pd.DataFrame(S, index=species_ids, columns=reaction_ids) S_file = output_path / f"{model_name}_stoichiometry.csv" S_df.to_csv(S_file) exported_files['stoichiometry'] = str(S_file) - + # Adjacency matrix with labels A_df = pd.DataFrame(A, index=species_ids, columns=species_ids) A_file = output_path / f"{model_name}_adjacency.csv" A_df.to_csv(A_file) exported_files['adjacency'] = str(A_file) - + elif format == 'json': # Export as JSON ml_dataset = self.get_ml_dataset() @@ -599,7 +611,7 @@ def convert_numpy(obj): elif isinstance(obj, pd.DataFrame): return obj.to_dict('records') return obj - + json_data = {} for key, value in ml_dataset.items(): if key == 'dataframes': @@ -610,28 +622,28 @@ def convert_numpy(obj): json_data[key] = {k: convert_numpy(v) for k, v in value.items()} else: json_data[key] = value - + json_file = output_path / f"{model_name}_ml_data.json" with open(json_file, 'w') as f: json.dump(json_data, f, indent=2) exported_files['ml_data'] = str(json_file) - + elif format == 'npz': # Export as NumPy compressed format ml_dataset = self.get_ml_dataset() arrays_to_save = {} - + # Flatten all numpy arrays with descriptive names if 'matrices' in ml_dataset: arrays_to_save['stoichiometry_matrix'] = ml_dataset['matrices']['stoichiometry'] arrays_to_save['adjacency_matrix'] = ml_dataset['matrices']['adjacency'] - + if 'features' in ml_dataset: for feat_name, feat_array in ml_dataset['features'].items(): arrays_to_save[f'{feat_name}_features'] = feat_array - + npz_file = output_path / f"{model_name}_ml_data.npz" np.savez_compressed(npz_file, **arrays_to_save) exported_files['ml_data'] = str(npz_file) - + return exported_files diff --git a/simba_ml/simulation/system_model/__init__.py b/simba_ml/simulation/system_model/__init__.py index 2d4ab32..afd9b1c 100644 --- a/simba_ml/simulation/system_model/__init__.py +++ b/simba_ml/simulation/system_model/__init__.py @@ -2,5 +2,6 @@ # pylint: disable=only-importing-modules-is-allowed from simba_ml.simulation.system_model.system_model import SystemModel +from simba_ml.simulation.system_model.sbml_system_model import SBMLSystemModel -__all__ = ["SystemModel"] +__all__ = ["SystemModel", "SBMLSystemModel"] diff --git a/tests/prediction/steady_state/conf/steady_state_pipeline_test_conf.toml b/tests/prediction/steady_state/conf/steady_state_pipeline_test_conf.toml index 6188b19..a609d56 100644 --- a/tests/prediction/steady_state/conf/steady_state_pipeline_test_conf.toml +++ b/tests/prediction/steady_state/conf/steady_state_pipeline_test_conf.toml @@ -15,4 +15,4 @@ prediction_params = ["B"] real = "/tests/prediction/time_series/test_data/real/" simulated = "/tests/prediction/time_series/test_data/simulated/" ratios = [1.0, 0.5] -test_split = 0.2 \ No newline at end of file +test_split = 0.2 diff --git a/tests/prediction/steady_state/test_data/real/SteadyState_0.csv b/tests/prediction/steady_state/test_data/real/SteadyState_0.csv index 17a0a1d..ef0886e 100644 --- a/tests/prediction/steady_state/test_data/real/SteadyState_0.csv +++ b/tests/prediction/steady_state/test_data/real/SteadyState_0.csv @@ -23,4 +23,4 @@ A,B 100.0,500.0 100.0,500.0 100.0,500.0 -100.0,500.0 \ No newline at end of file +100.0,500.0 diff --git a/tests/prediction/steady_state/test_data/real/SteadyState_1.csv b/tests/prediction/steady_state/test_data/real/SteadyState_1.csv index 17a0a1d..ef0886e 100644 --- a/tests/prediction/steady_state/test_data/real/SteadyState_1.csv +++ b/tests/prediction/steady_state/test_data/real/SteadyState_1.csv @@ -23,4 +23,4 @@ A,B 100.0,500.0 100.0,500.0 100.0,500.0 -100.0,500.0 \ No newline at end of file +100.0,500.0 diff --git a/tests/prediction/steady_state/test_data/real/SteadyState_2.csv b/tests/prediction/steady_state/test_data/real/SteadyState_2.csv index 17a0a1d..ef0886e 100644 --- a/tests/prediction/steady_state/test_data/real/SteadyState_2.csv +++ b/tests/prediction/steady_state/test_data/real/SteadyState_2.csv @@ -23,4 +23,4 @@ A,B 100.0,500.0 100.0,500.0 100.0,500.0 -100.0,500.0 \ No newline at end of file +100.0,500.0 diff --git a/tests/prediction/steady_state/test_data/real/SteadyState_3.csv b/tests/prediction/steady_state/test_data/real/SteadyState_3.csv index 17a0a1d..ef0886e 100644 --- a/tests/prediction/steady_state/test_data/real/SteadyState_3.csv +++ b/tests/prediction/steady_state/test_data/real/SteadyState_3.csv @@ -23,4 +23,4 @@ A,B 100.0,500.0 100.0,500.0 100.0,500.0 -100.0,500.0 \ No newline at end of file +100.0,500.0 diff --git a/tests/prediction/steady_state/test_data/real/SteadyState_4.csv b/tests/prediction/steady_state/test_data/real/SteadyState_4.csv index 17a0a1d..ef0886e 100644 --- a/tests/prediction/steady_state/test_data/real/SteadyState_4.csv +++ b/tests/prediction/steady_state/test_data/real/SteadyState_4.csv @@ -23,4 +23,4 @@ A,B 100.0,500.0 100.0,500.0 100.0,500.0 -100.0,500.0 \ No newline at end of file +100.0,500.0 diff --git a/tests/prediction/steady_state/test_data/simulated/SteadyState_0.csv b/tests/prediction/steady_state/test_data/simulated/SteadyState_0.csv index 4736441..373e848 100644 --- a/tests/prediction/steady_state/test_data/simulated/SteadyState_0.csv +++ b/tests/prediction/steady_state/test_data/simulated/SteadyState_0.csv @@ -23,4 +23,4 @@ A,B 200.0, 600.0 200.0, 600.0 200.0, 600.0 -200.0, 600.0 \ No newline at end of file +200.0, 600.0 diff --git a/tests/prediction/steady_state/test_data/simulated/SteadyState_1.csv b/tests/prediction/steady_state/test_data/simulated/SteadyState_1.csv index 4736441..373e848 100644 --- a/tests/prediction/steady_state/test_data/simulated/SteadyState_1.csv +++ b/tests/prediction/steady_state/test_data/simulated/SteadyState_1.csv @@ -23,4 +23,4 @@ A,B 200.0, 600.0 200.0, 600.0 200.0, 600.0 -200.0, 600.0 \ No newline at end of file +200.0, 600.0 diff --git a/tests/prediction/steady_state/test_data/simulated/SteadyState_2.csv b/tests/prediction/steady_state/test_data/simulated/SteadyState_2.csv index 4736441..373e848 100644 --- a/tests/prediction/steady_state/test_data/simulated/SteadyState_2.csv +++ b/tests/prediction/steady_state/test_data/simulated/SteadyState_2.csv @@ -23,4 +23,4 @@ A,B 200.0, 600.0 200.0, 600.0 200.0, 600.0 -200.0, 600.0 \ No newline at end of file +200.0, 600.0 diff --git a/tests/prediction/steady_state/test_data/simulated/SteadyState_3.csv b/tests/prediction/steady_state/test_data/simulated/SteadyState_3.csv index 4736441..373e848 100644 --- a/tests/prediction/steady_state/test_data/simulated/SteadyState_3.csv +++ b/tests/prediction/steady_state/test_data/simulated/SteadyState_3.csv @@ -23,4 +23,4 @@ A,B 200.0, 600.0 200.0, 600.0 200.0, 600.0 -200.0, 600.0 \ No newline at end of file +200.0, 600.0 diff --git a/tests/prediction/steady_state/test_data/simulated/SteadyState_4.csv b/tests/prediction/steady_state/test_data/simulated/SteadyState_4.csv index 4736441..373e848 100644 --- a/tests/prediction/steady_state/test_data/simulated/SteadyState_4.csv +++ b/tests/prediction/steady_state/test_data/simulated/SteadyState_4.csv @@ -23,4 +23,4 @@ A,B 200.0, 600.0 200.0, 600.0 200.0, 600.0 -200.0, 600.0 \ No newline at end of file +200.0, 600.0 diff --git a/tests/prediction/time_series/conf/mixed_data_pipeline_export.toml b/tests/prediction/time_series/conf/mixed_data_pipeline_export.toml index 4f8e1ea..88596a3 100644 --- a/tests/prediction/time_series/conf/mixed_data_pipeline_export.toml +++ b/tests/prediction/time_series/conf/mixed_data_pipeline_export.toml @@ -31,4 +31,4 @@ export_path = "tests/prediction/time_series/test_data/export" input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] -output_features = ["Infected", "Recovered"] \ No newline at end of file +output_features = ["Infected", "Recovered"] diff --git a/tests/prediction/time_series/conf/pipeline_test_conf_plugins_normalize.toml b/tests/prediction/time_series/conf/pipeline_test_conf_plugins_normalize.toml index 37a24b5..c833279 100644 --- a/tests/prediction/time_series/conf/pipeline_test_conf_plugins_normalize.toml +++ b/tests/prediction/time_series/conf/pipeline_test_conf_plugins_normalize.toml @@ -34,5 +34,3 @@ input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] output_features = ["Infected", "Recovered"] - - diff --git a/tests/prediction/time_series/conf/synthetic_data_pipeline_export.toml b/tests/prediction/time_series/conf/synthetic_data_pipeline_export.toml index af703c8..02695b7 100644 --- a/tests/prediction/time_series/conf/synthetic_data_pipeline_export.toml +++ b/tests/prediction/time_series/conf/synthetic_data_pipeline_export.toml @@ -29,4 +29,4 @@ export_path = "tests/prediction/time_series/test_data/export" input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] -output_features = ["Infected", "Recovered"] \ No newline at end of file +output_features = ["Infected", "Recovered"] diff --git a/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf.toml b/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf.toml index 6fb81bc..107a111 100644 --- a/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf.toml +++ b/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf.toml @@ -44,4 +44,4 @@ test_split = 0.2 input_features = ["Infected", "Recovered"] output_features = ["Infected", "Recovered"] input_length = 1 -output_length = 1 \ No newline at end of file +output_length = 1 diff --git a/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf_3_species.toml b/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf_3_species.toml index b5b8187..9ed6d4d 100644 --- a/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf_3_species.toml +++ b/tests/prediction/time_series/conf/synthetic_data_pipeline_test_conf_3_species.toml @@ -45,5 +45,3 @@ input_features = ["8", "10", "21"] output_features = ["8", "10", "21"] input_length = 2 output_length = 2 - - diff --git a/tests/prediction/time_series/conf/time_series_pipeline_test_conf.toml b/tests/prediction/time_series/conf/time_series_pipeline_test_conf.toml index f799aec..4f1d9a1 100644 --- a/tests/prediction/time_series/conf/time_series_pipeline_test_conf.toml +++ b/tests/prediction/time_series/conf/time_series_pipeline_test_conf.toml @@ -31,4 +31,4 @@ test_split = 0.2 input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] -output_features = ["Infected", "Recovered"] \ No newline at end of file +output_features = ["Infected", "Recovered"] diff --git a/tests/prediction/time_series/conf/transfer_learning_pipeline.toml b/tests/prediction/time_series/conf/transfer_learning_pipeline.toml index 4453560..e98f325 100644 --- a/tests/prediction/time_series/conf/transfer_learning_pipeline.toml +++ b/tests/prediction/time_series/conf/transfer_learning_pipeline.toml @@ -31,4 +31,4 @@ test_split = 0.2 input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] -output_features = ["Infected", "Recovered"] \ No newline at end of file +output_features = ["Infected", "Recovered"] diff --git a/tests/prediction/time_series/conf/transfer_learning_pipeline_3_spec.toml b/tests/prediction/time_series/conf/transfer_learning_pipeline_3_spec.toml index c10eb25..71a22ce 100644 --- a/tests/prediction/time_series/conf/transfer_learning_pipeline_3_spec.toml +++ b/tests/prediction/time_series/conf/transfer_learning_pipeline_3_spec.toml @@ -31,4 +31,4 @@ test_split = 0.2 input_length = 1 output_length = 1 input_features = ["8", "10", "21"] -output_features = ["8", "10", "21"] \ No newline at end of file +output_features = ["8", "10", "21"] diff --git a/tests/prediction/time_series/conf/transfer_learning_pipeline_export.toml b/tests/prediction/time_series/conf/transfer_learning_pipeline_export.toml index 0f938db..0626a82 100644 --- a/tests/prediction/time_series/conf/transfer_learning_pipeline_export.toml +++ b/tests/prediction/time_series/conf/transfer_learning_pipeline_export.toml @@ -32,4 +32,4 @@ export_path = "tests/prediction/time_series/test_data/export" input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] -output_features = ["Infected", "Recovered"] \ No newline at end of file +output_features = ["Infected", "Recovered"] diff --git a/tests/prediction/time_series/conf/transfer_learning_pipeline_test.toml b/tests/prediction/time_series/conf/transfer_learning_pipeline_test.toml index 2e949dd..23506d4 100644 --- a/tests/prediction/time_series/conf/transfer_learning_pipeline_test.toml +++ b/tests/prediction/time_series/conf/transfer_learning_pipeline_test.toml @@ -26,4 +26,4 @@ test_split = 0.2 input_length = 1 output_length = 1 input_features = ["Infected", "Recovered"] -output_features = ["Infected", "Recovered"] \ No newline at end of file +output_features = ["Infected", "Recovered"] diff --git a/tests/prediction/time_series/test_data/num_species_1/real/.gitignore b/tests/prediction/time_series/test_data/num_species_1/real/.gitignore index 3a40598..acd6131 100644 --- a/tests/prediction/time_series/test_data/num_species_1/real/.gitignore +++ b/tests/prediction/time_series/test_data/num_species_1/real/.gitignore @@ -1 +1 @@ -!.csv \ No newline at end of file +!.csv diff --git a/tests/prediction/time_series/test_data/num_species_1/real/SIR_0.csv b/tests/prediction/time_series/test_data/num_species_1/real/SIR_0.csv index 0bf57c0..865a7a0 100644 --- a/tests/prediction/time_series/test_data/num_species_1/real/SIR_0.csv +++ b/tests/prediction/time_series/test_data/num_species_1/real/SIR_0.csv @@ -48,4 +48,4 @@ Infected,Recovered 400.0,400.0 400.0,400.0 400.0,400.0 -400.0,400.0 \ No newline at end of file +400.0,400.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/real/SIR_1.csv b/tests/prediction/time_series/test_data/num_species_1/real/SIR_1.csv index 0bf57c0..865a7a0 100644 --- a/tests/prediction/time_series/test_data/num_species_1/real/SIR_1.csv +++ b/tests/prediction/time_series/test_data/num_species_1/real/SIR_1.csv @@ -48,4 +48,4 @@ Infected,Recovered 400.0,400.0 400.0,400.0 400.0,400.0 -400.0,400.0 \ No newline at end of file +400.0,400.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/real/SIR_2.csv b/tests/prediction/time_series/test_data/num_species_1/real/SIR_2.csv index 0bf57c0..865a7a0 100644 --- a/tests/prediction/time_series/test_data/num_species_1/real/SIR_2.csv +++ b/tests/prediction/time_series/test_data/num_species_1/real/SIR_2.csv @@ -48,4 +48,4 @@ Infected,Recovered 400.0,400.0 400.0,400.0 400.0,400.0 -400.0,400.0 \ No newline at end of file +400.0,400.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/real/SIR_3.csv b/tests/prediction/time_series/test_data/num_species_1/real/SIR_3.csv index 0bf57c0..865a7a0 100644 --- a/tests/prediction/time_series/test_data/num_species_1/real/SIR_3.csv +++ b/tests/prediction/time_series/test_data/num_species_1/real/SIR_3.csv @@ -48,4 +48,4 @@ Infected,Recovered 400.0,400.0 400.0,400.0 400.0,400.0 -400.0,400.0 \ No newline at end of file +400.0,400.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/real/SIR_4.csv b/tests/prediction/time_series/test_data/num_species_1/real/SIR_4.csv index 0bf57c0..865a7a0 100644 --- a/tests/prediction/time_series/test_data/num_species_1/real/SIR_4.csv +++ b/tests/prediction/time_series/test_data/num_species_1/real/SIR_4.csv @@ -48,4 +48,4 @@ Infected,Recovered 400.0,400.0 400.0,400.0 400.0,400.0 -400.0,400.0 \ No newline at end of file +400.0,400.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/simulated/.gitignore b/tests/prediction/time_series/test_data/num_species_1/simulated/.gitignore index 3a40598..acd6131 100644 --- a/tests/prediction/time_series/test_data/num_species_1/simulated/.gitignore +++ b/tests/prediction/time_series/test_data/num_species_1/simulated/.gitignore @@ -1 +1 @@ -!.csv \ No newline at end of file +!.csv diff --git a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_0.csv b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_0.csv index 1f8fe27..06cbe6c 100644 --- a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_0.csv +++ b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_0.csv @@ -48,4 +48,4 @@ Infected,Recovered 700.0,700.0 800.0,800.0 900.0,900.0 -1000.0,1000.0 \ No newline at end of file +1000.0,1000.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_1.csv b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_1.csv index 1f8fe27..06cbe6c 100644 --- a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_1.csv +++ b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_1.csv @@ -48,4 +48,4 @@ Infected,Recovered 700.0,700.0 800.0,800.0 900.0,900.0 -1000.0,1000.0 \ No newline at end of file +1000.0,1000.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_2.csv b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_2.csv index 1f8fe27..06cbe6c 100644 --- a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_2.csv +++ b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_2.csv @@ -48,4 +48,4 @@ Infected,Recovered 700.0,700.0 800.0,800.0 900.0,900.0 -1000.0,1000.0 \ No newline at end of file +1000.0,1000.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_3.csv b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_3.csv index 1f8fe27..06cbe6c 100644 --- a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_3.csv +++ b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_3.csv @@ -48,4 +48,4 @@ Infected,Recovered 700.0,700.0 800.0,800.0 900.0,900.0 -1000.0,1000.0 \ No newline at end of file +1000.0,1000.0 diff --git a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_4.csv b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_4.csv index 1f8fe27..06cbe6c 100644 --- a/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_4.csv +++ b/tests/prediction/time_series/test_data/num_species_1/simulated/SIR_4.csv @@ -48,4 +48,4 @@ Infected,Recovered 700.0,700.0 800.0,800.0 900.0,900.0 -1000.0,1000.0 \ No newline at end of file +1000.0,1000.0 diff --git a/tests/prediction/time_series/test_data_plugins/real/.gitignore b/tests/prediction/time_series/test_data_plugins/real/.gitignore index 3a40598..acd6131 100644 --- a/tests/prediction/time_series/test_data_plugins/real/.gitignore +++ b/tests/prediction/time_series/test_data_plugins/real/.gitignore @@ -1 +1 @@ -!.csv \ No newline at end of file +!.csv diff --git a/tests/prediction/time_series/test_data_plugins/real/SIR_0.csv b/tests/prediction/time_series/test_data_plugins/real/SIR_0.csv index 0bf57c0..38f11c5 100644 --- a/tests/prediction/time_series/test_data_plugins/real/SIR_0.csv +++ b/tests/prediction/time_series/test_data_plugins/real/SIR_0.csv @@ -1,51 +1,50 @@ Infected,Recovered -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 \ No newline at end of file +495.0,5.0 +510.0,12.0 +525.0,25.0 +550.0,45.0 +580.0,75.0 +620.0,110.0 +600.0,150.0 +575.0,190.0 +540.0,230.0 +510.0,270.0 +480.0,310.0 +450.0,340.0 +420.0,365.0 +400.0,385.0 +380.0,400.0 +360.0,410.0 +340.0,415.0 +325.0,420.0 +310.0,425.0 +300.0,430.0 +290.0,435.0 +285.0,440.0 +280.0,445.0 +275.0,450.0 +270.0,455.0 +265.0,460.0 +260.0,465.0 +255.0,470.0 +250.0,475.0 +245.0,480.0 +240.0,485.0 +235.0,490.0 +230.0,495.0 +225.0,500.0 +220.0,500.0 +215.0,500.0 +210.0,500.0 +205.0,500.0 +200.0,500.0 +195.0,500.0 +190.0,500.0 +185.0,500.0 +180.0,500.0 +175.0,500.0 +170.0,500.0 +165.0,500.0 +160.0,500.0 +155.0,500.0 +150.0,500.0 diff --git a/tests/prediction/time_series/test_data_plugins/real/SIR_1.csv b/tests/prediction/time_series/test_data_plugins/real/SIR_1.csv index 0bf57c0..38f11c5 100644 --- a/tests/prediction/time_series/test_data_plugins/real/SIR_1.csv +++ b/tests/prediction/time_series/test_data_plugins/real/SIR_1.csv @@ -1,51 +1,50 @@ Infected,Recovered -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 \ No newline at end of file +495.0,5.0 +510.0,12.0 +525.0,25.0 +550.0,45.0 +580.0,75.0 +620.0,110.0 +600.0,150.0 +575.0,190.0 +540.0,230.0 +510.0,270.0 +480.0,310.0 +450.0,340.0 +420.0,365.0 +400.0,385.0 +380.0,400.0 +360.0,410.0 +340.0,415.0 +325.0,420.0 +310.0,425.0 +300.0,430.0 +290.0,435.0 +285.0,440.0 +280.0,445.0 +275.0,450.0 +270.0,455.0 +265.0,460.0 +260.0,465.0 +255.0,470.0 +250.0,475.0 +245.0,480.0 +240.0,485.0 +235.0,490.0 +230.0,495.0 +225.0,500.0 +220.0,500.0 +215.0,500.0 +210.0,500.0 +205.0,500.0 +200.0,500.0 +195.0,500.0 +190.0,500.0 +185.0,500.0 +180.0,500.0 +175.0,500.0 +170.0,500.0 +165.0,500.0 +160.0,500.0 +155.0,500.0 +150.0,500.0 diff --git a/tests/prediction/time_series/test_data_plugins/real/SIR_2.csv b/tests/prediction/time_series/test_data_plugins/real/SIR_2.csv index 0bf57c0..38f11c5 100644 --- a/tests/prediction/time_series/test_data_plugins/real/SIR_2.csv +++ b/tests/prediction/time_series/test_data_plugins/real/SIR_2.csv @@ -1,51 +1,50 @@ Infected,Recovered -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 \ No newline at end of file +495.0,5.0 +510.0,12.0 +525.0,25.0 +550.0,45.0 +580.0,75.0 +620.0,110.0 +600.0,150.0 +575.0,190.0 +540.0,230.0 +510.0,270.0 +480.0,310.0 +450.0,340.0 +420.0,365.0 +400.0,385.0 +380.0,400.0 +360.0,410.0 +340.0,415.0 +325.0,420.0 +310.0,425.0 +300.0,430.0 +290.0,435.0 +285.0,440.0 +280.0,445.0 +275.0,450.0 +270.0,455.0 +265.0,460.0 +260.0,465.0 +255.0,470.0 +250.0,475.0 +245.0,480.0 +240.0,485.0 +235.0,490.0 +230.0,495.0 +225.0,500.0 +220.0,500.0 +215.0,500.0 +210.0,500.0 +205.0,500.0 +200.0,500.0 +195.0,500.0 +190.0,500.0 +185.0,500.0 +180.0,500.0 +175.0,500.0 +170.0,500.0 +165.0,500.0 +160.0,500.0 +155.0,500.0 +150.0,500.0 diff --git a/tests/prediction/time_series/test_data_plugins/real/SIR_3.csv b/tests/prediction/time_series/test_data_plugins/real/SIR_3.csv index 0bf57c0..38f11c5 100644 --- a/tests/prediction/time_series/test_data_plugins/real/SIR_3.csv +++ b/tests/prediction/time_series/test_data_plugins/real/SIR_3.csv @@ -1,51 +1,50 @@ Infected,Recovered -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 \ No newline at end of file +495.0,5.0 +510.0,12.0 +525.0,25.0 +550.0,45.0 +580.0,75.0 +620.0,110.0 +600.0,150.0 +575.0,190.0 +540.0,230.0 +510.0,270.0 +480.0,310.0 +450.0,340.0 +420.0,365.0 +400.0,385.0 +380.0,400.0 +360.0,410.0 +340.0,415.0 +325.0,420.0 +310.0,425.0 +300.0,430.0 +290.0,435.0 +285.0,440.0 +280.0,445.0 +275.0,450.0 +270.0,455.0 +265.0,460.0 +260.0,465.0 +255.0,470.0 +250.0,475.0 +245.0,480.0 +240.0,485.0 +235.0,490.0 +230.0,495.0 +225.0,500.0 +220.0,500.0 +215.0,500.0 +210.0,500.0 +205.0,500.0 +200.0,500.0 +195.0,500.0 +190.0,500.0 +185.0,500.0 +180.0,500.0 +175.0,500.0 +170.0,500.0 +165.0,500.0 +160.0,500.0 +155.0,500.0 +150.0,500.0 diff --git a/tests/prediction/time_series/test_data_plugins/real/SIR_4.csv b/tests/prediction/time_series/test_data_plugins/real/SIR_4.csv index 0bf57c0..38f11c5 100644 --- a/tests/prediction/time_series/test_data_plugins/real/SIR_4.csv +++ b/tests/prediction/time_series/test_data_plugins/real/SIR_4.csv @@ -1,51 +1,50 @@ Infected,Recovered -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -500.0,500.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 -400.0,400.0 \ No newline at end of file +495.0,5.0 +510.0,12.0 +525.0,25.0 +550.0,45.0 +580.0,75.0 +620.0,110.0 +600.0,150.0 +575.0,190.0 +540.0,230.0 +510.0,270.0 +480.0,310.0 +450.0,340.0 +420.0,365.0 +400.0,385.0 +380.0,400.0 +360.0,410.0 +340.0,415.0 +325.0,420.0 +310.0,425.0 +300.0,430.0 +290.0,435.0 +285.0,440.0 +280.0,445.0 +275.0,450.0 +270.0,455.0 +265.0,460.0 +260.0,465.0 +255.0,470.0 +250.0,475.0 +245.0,480.0 +240.0,485.0 +235.0,490.0 +230.0,495.0 +225.0,500.0 +220.0,500.0 +215.0,500.0 +210.0,500.0 +205.0,500.0 +200.0,500.0 +195.0,500.0 +190.0,500.0 +185.0,500.0 +180.0,500.0 +175.0,500.0 +170.0,500.0 +165.0,500.0 +160.0,500.0 +155.0,500.0 +150.0,500.0 diff --git a/tests/prediction/time_series/test_data_plugins/simulated/.gitignore b/tests/prediction/time_series/test_data_plugins/simulated/.gitignore index 3a40598..acd6131 100644 --- a/tests/prediction/time_series/test_data_plugins/simulated/.gitignore +++ b/tests/prediction/time_series/test_data_plugins/simulated/.gitignore @@ -1 +1 @@ -!.csv \ No newline at end of file +!.csv diff --git a/tests/prediction/time_series/test_data_plugins/simulated/SIR_0.csv b/tests/prediction/time_series/test_data_plugins/simulated/SIR_0.csv index 1f8fe27..b98bd58 100644 --- a/tests/prediction/time_series/test_data_plugins/simulated/SIR_0.csv +++ b/tests/prediction/time_series/test_data_plugins/simulated/SIR_0.csv @@ -1,51 +1,51 @@ Infected,Recovered -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 \ No newline at end of file +120.0,80.0 +250.0,150.0 +420.0,230.0 +650.0,350.0 +880.0,520.0 +950.0,750.0 +820.0,880.0 +650.0,950.0 +480.0,1020.0 +320.0,1080.0 +180.0,1120.0 +90.0,1150.0 +150.0,1050.0 +280.0,920.0 +450.0,750.0 +680.0,520.0 +850.0,350.0 +920.0,230.0 +850.0,150.0 +720.0,80.0 +580.0,50.0 +450.0,40.0 +350.0,50.0 +280.0,70.0 +220.0,90.0 +180.0,120.0 +150.0,150.0 +130.0,180.0 +120.0,200.0 +110.0,220.0 +100.0,240.0 +90.0,260.0 +80.0,280.0 +70.0,300.0 +60.0,320.0 +50.0,340.0 +40.0,360.0 +30.0,380.0 +20.0,400.0 +10.0,420.0 +5.0,440.0 +10.0,460.0 +15.0,480.0 +20.0,500.0 +25.0,520.0 +30.0,540.0 +35.0,560.0 +40.0,580.0 +45.0,600.0 +50.0,620.0 diff --git a/tests/prediction/time_series/test_data_plugins/simulated/SIR_1.csv b/tests/prediction/time_series/test_data_plugins/simulated/SIR_1.csv index 1f8fe27..b98bd58 100644 --- a/tests/prediction/time_series/test_data_plugins/simulated/SIR_1.csv +++ b/tests/prediction/time_series/test_data_plugins/simulated/SIR_1.csv @@ -1,51 +1,51 @@ Infected,Recovered -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 \ No newline at end of file +120.0,80.0 +250.0,150.0 +420.0,230.0 +650.0,350.0 +880.0,520.0 +950.0,750.0 +820.0,880.0 +650.0,950.0 +480.0,1020.0 +320.0,1080.0 +180.0,1120.0 +90.0,1150.0 +150.0,1050.0 +280.0,920.0 +450.0,750.0 +680.0,520.0 +850.0,350.0 +920.0,230.0 +850.0,150.0 +720.0,80.0 +580.0,50.0 +450.0,40.0 +350.0,50.0 +280.0,70.0 +220.0,90.0 +180.0,120.0 +150.0,150.0 +130.0,180.0 +120.0,200.0 +110.0,220.0 +100.0,240.0 +90.0,260.0 +80.0,280.0 +70.0,300.0 +60.0,320.0 +50.0,340.0 +40.0,360.0 +30.0,380.0 +20.0,400.0 +10.0,420.0 +5.0,440.0 +10.0,460.0 +15.0,480.0 +20.0,500.0 +25.0,520.0 +30.0,540.0 +35.0,560.0 +40.0,580.0 +45.0,600.0 +50.0,620.0 diff --git a/tests/prediction/time_series/test_data_plugins/simulated/SIR_2.csv b/tests/prediction/time_series/test_data_plugins/simulated/SIR_2.csv index 1f8fe27..b98bd58 100644 --- a/tests/prediction/time_series/test_data_plugins/simulated/SIR_2.csv +++ b/tests/prediction/time_series/test_data_plugins/simulated/SIR_2.csv @@ -1,51 +1,51 @@ Infected,Recovered -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 \ No newline at end of file +120.0,80.0 +250.0,150.0 +420.0,230.0 +650.0,350.0 +880.0,520.0 +950.0,750.0 +820.0,880.0 +650.0,950.0 +480.0,1020.0 +320.0,1080.0 +180.0,1120.0 +90.0,1150.0 +150.0,1050.0 +280.0,920.0 +450.0,750.0 +680.0,520.0 +850.0,350.0 +920.0,230.0 +850.0,150.0 +720.0,80.0 +580.0,50.0 +450.0,40.0 +350.0,50.0 +280.0,70.0 +220.0,90.0 +180.0,120.0 +150.0,150.0 +130.0,180.0 +120.0,200.0 +110.0,220.0 +100.0,240.0 +90.0,260.0 +80.0,280.0 +70.0,300.0 +60.0,320.0 +50.0,340.0 +40.0,360.0 +30.0,380.0 +20.0,400.0 +10.0,420.0 +5.0,440.0 +10.0,460.0 +15.0,480.0 +20.0,500.0 +25.0,520.0 +30.0,540.0 +35.0,560.0 +40.0,580.0 +45.0,600.0 +50.0,620.0 diff --git a/tests/prediction/time_series/test_data_plugins/simulated/SIR_3.csv b/tests/prediction/time_series/test_data_plugins/simulated/SIR_3.csv index 1f8fe27..b98bd58 100644 --- a/tests/prediction/time_series/test_data_plugins/simulated/SIR_3.csv +++ b/tests/prediction/time_series/test_data_plugins/simulated/SIR_3.csv @@ -1,51 +1,51 @@ Infected,Recovered -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 \ No newline at end of file +120.0,80.0 +250.0,150.0 +420.0,230.0 +650.0,350.0 +880.0,520.0 +950.0,750.0 +820.0,880.0 +650.0,950.0 +480.0,1020.0 +320.0,1080.0 +180.0,1120.0 +90.0,1150.0 +150.0,1050.0 +280.0,920.0 +450.0,750.0 +680.0,520.0 +850.0,350.0 +920.0,230.0 +850.0,150.0 +720.0,80.0 +580.0,50.0 +450.0,40.0 +350.0,50.0 +280.0,70.0 +220.0,90.0 +180.0,120.0 +150.0,150.0 +130.0,180.0 +120.0,200.0 +110.0,220.0 +100.0,240.0 +90.0,260.0 +80.0,280.0 +70.0,300.0 +60.0,320.0 +50.0,340.0 +40.0,360.0 +30.0,380.0 +20.0,400.0 +10.0,420.0 +5.0,440.0 +10.0,460.0 +15.0,480.0 +20.0,500.0 +25.0,520.0 +30.0,540.0 +35.0,560.0 +40.0,580.0 +45.0,600.0 +50.0,620.0 diff --git a/tests/prediction/time_series/test_data_plugins/simulated/SIR_4.csv b/tests/prediction/time_series/test_data_plugins/simulated/SIR_4.csv index 1f8fe27..b98bd58 100644 --- a/tests/prediction/time_series/test_data_plugins/simulated/SIR_4.csv +++ b/tests/prediction/time_series/test_data_plugins/simulated/SIR_4.csv @@ -1,51 +1,51 @@ Infected,Recovered -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 -100.0,100.0 -200.0,200.0 -300.0,300.0 -400.0,400.0 -500.0,500.0 -600.0,600.0 -700.0,700.0 -800.0,800.0 -900.0,900.0 -1000.0,1000.0 \ No newline at end of file +120.0,80.0 +250.0,150.0 +420.0,230.0 +650.0,350.0 +880.0,520.0 +950.0,750.0 +820.0,880.0 +650.0,950.0 +480.0,1020.0 +320.0,1080.0 +180.0,1120.0 +90.0,1150.0 +150.0,1050.0 +280.0,920.0 +450.0,750.0 +680.0,520.0 +850.0,350.0 +920.0,230.0 +850.0,150.0 +720.0,80.0 +580.0,50.0 +450.0,40.0 +350.0,50.0 +280.0,70.0 +220.0,90.0 +180.0,120.0 +150.0,150.0 +130.0,180.0 +120.0,200.0 +110.0,220.0 +100.0,240.0 +90.0,260.0 +80.0,280.0 +70.0,300.0 +60.0,320.0 +50.0,340.0 +40.0,360.0 +30.0,380.0 +20.0,400.0 +10.0,420.0 +5.0,440.0 +10.0,460.0 +15.0,480.0 +20.0,500.0 +25.0,520.0 +30.0,540.0 +35.0,560.0 +40.0,580.0 +45.0,600.0 +50.0,620.0 diff --git a/tests/sbml_parser/test_main_parser.py b/tests/sbml_parser/test_main_parser.py index 70edd1a..adabc83 100644 --- a/tests/sbml_parser/test_main_parser.py +++ b/tests/sbml_parser/test_main_parser.py @@ -9,7 +9,7 @@ class TestMainSBMLParser: """Test the main SBML parser functionality.""" - + def test_init(self): """Test parser initialization.""" parser = MainSBMLParser("test_file.xml") @@ -22,7 +22,7 @@ def test_supported_versions(self): """Test that supported versions are correctly defined.""" expected_versions = { (2, 4): "level_2.parser", - (2, 5): "level_2.parser", + (2, 5): "level_2.parser", (3, 1): "level_3.parser", (3, 2): "level_3.parser" } @@ -31,29 +31,29 @@ def test_supported_versions(self): def test_get_parser_module_supported(self): """Test getting parser module for supported versions.""" parser = MainSBMLParser("test.xml") - + # Test Level 2 versions assert parser.get_parser_module(2, 4) == "level_2.parser" assert parser.get_parser_module(2, 5) == "level_2.parser" - - # Test Level 3 versions + + # Test Level 3 versions assert parser.get_parser_module(3, 1) == "level_3.parser" assert parser.get_parser_module(3, 2) == "level_3.parser" def test_get_parser_module_unsupported(self): """Test error for unsupported versions.""" parser = MainSBMLParser("test.xml") - + with pytest.raises(UnsupportedSBMLVersionError) as excinfo: parser.get_parser_module(1, 2) - + assert "Level 1 Version 2 is not supported" in str(excinfo.value) assert "Supported versions:" in str(excinfo.value) def test_detect_version_and_level_invalid_file(self): """Test error handling for invalid file.""" parser = MainSBMLParser("nonexistent_file.xml") - + with pytest.raises(SBMLParsingError): parser.detect_version_and_level() @@ -67,27 +67,27 @@ def test_validate_ode_model_no_reactions(self): - ''' - + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: f.write(sbml_content) f.flush() - + try: parser = MainSBMLParser(f.name) level, version, model = parser.detect_version_and_level() - + # Should not raise error, just log warning parser.validate_ode_model(model) - + assert level == 3 assert version == 1 assert model is not None - + finally: os.unlink(f.name) @@ -100,9 +100,9 @@ def test_detect_version_level_with_valid_sbml(self): - - @@ -129,29 +129,29 @@ def test_detect_version_level_with_valid_sbml(self): ''' - + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: f.write(sbml_content) f.flush() - + try: parser = MainSBMLParser(f.name) level, version, model = parser.detect_version_and_level() - + assert level == 3 assert version == 2 assert model is not None assert parser.level == 3 assert parser.version == 2 assert parser.model is not None - + finally: os.unlink(f.name) class TestSBMLParsingIntegration: """Integration tests for SBML parsing.""" - + def create_test_sbml_file(self, level, version, content_additions=""): """Helper to create test SBML files.""" base_content = f''' @@ -161,7 +161,7 @@ def create_test_sbml_file(self, level, version, content_additions=""): - @@ -184,7 +184,7 @@ def create_test_sbml_file(self, level, version, content_additions=""): {content_additions} ''' - + temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) temp_file.write(base_content) temp_file.flush() @@ -194,64 +194,64 @@ def create_test_sbml_file(self, level, version, content_additions=""): def test_level_2_version_4_parsing(self): """Test parsing Level 2 Version 4 files.""" test_file = self.create_test_sbml_file(2, 4) - + try: parser = MainSBMLParser(test_file) result = parser.process() - + assert isinstance(result, dict) assert 'sbml_info' in result assert result['sbml_info']['level'] == 2 assert result['sbml_info']['version'] == 4 - + finally: os.unlink(test_file) def test_level_2_version_5_parsing(self): """Test parsing Level 2 Version 5 files.""" test_file = self.create_test_sbml_file(2, 5) - + try: parser = MainSBMLParser(test_file) result = parser.process() - + assert isinstance(result, dict) assert 'sbml_info' in result assert result['sbml_info']['level'] == 2 assert result['sbml_info']['version'] == 5 - + finally: os.unlink(test_file) def test_level_3_version_1_parsing(self): """Test parsing Level 3 Version 1 files.""" test_file = self.create_test_sbml_file(3, 1) - + try: parser = MainSBMLParser(test_file) result = parser.process() - + assert isinstance(result, dict) assert 'sbml_info' in result assert result['sbml_info']['level'] == 3 assert result['sbml_info']['version'] == 1 - + finally: os.unlink(test_file) def test_level_3_version_2_parsing(self): """Test parsing Level 3 Version 2 files.""" test_file = self.create_test_sbml_file(3, 2) - + try: parser = MainSBMLParser(test_file) result = parser.process() - + assert isinstance(result, dict) assert 'sbml_info' in result assert result['sbml_info']['level'] == 3 assert result['sbml_info']['version'] == 2 - + finally: os.unlink(test_file) @@ -266,19 +266,19 @@ def test_unsupported_version_raises_error(self): ''' - + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: f.write(sbml_content) f.flush() - + try: parser = MainSBMLParser(f.name) - + with pytest.raises(UnsupportedSBMLVersionError) as excinfo: parser.process() - + assert "Level 1 Version 2 is not supported" in str(excinfo.value) - + finally: os.unlink(f.name) @@ -292,17 +292,17 @@ def test_malformed_sbml_raises_error(self): ''' - + with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: f.write(malformed_content) f.flush() - + try: parser = MainSBMLParser(f.name) # Should not raise error during parsing, but might log warnings result = parser.process() assert isinstance(result, dict) - + finally: os.unlink(f.name) @@ -312,4 +312,4 @@ def teardown_method(self): if __name__ == "__main__": - pytest.main([__file__]) \ No newline at end of file + pytest.main([__file__]) From 8a1c2de727505255b66ee97b0cc003511981b3a4 Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Sun, 28 Sep 2025 12:55:00 +0200 Subject: [PATCH 3/7] fixes SBML parameter handling - fixes SBMLSystemModel parameter naming to use SBML names instead of artificial prefixes - resolve local parameter conflicts with global parameters in kinetic law evaluation --- simba_ml/sbml_parser/level_2/parser.py | 21 +- .../system_model/sbml_system_model.py | 583 ++++++++++++++++++ 2 files changed, 603 insertions(+), 1 deletion(-) create mode 100644 simba_ml/simulation/system_model/sbml_system_model.py diff --git a/simba_ml/sbml_parser/level_2/parser.py b/simba_ml/sbml_parser/level_2/parser.py index 687c473..d12eb9b 100644 --- a/simba_ml/sbml_parser/level_2/parser.py +++ b/simba_ml/sbml_parser/level_2/parser.py @@ -41,7 +41,8 @@ def parse(self): 'parameters': self._parse_parameters(), 'compartments': self._parse_compartments(), 'rules': self._parse_rules(), - 'initial_assignments': self._parse_initial_assignments() + 'initial_assignments': self._parse_initial_assignments(), + 'function_definitions': self._parse_function_definitions() } return parsed_data @@ -293,3 +294,21 @@ def _get_rule_type_name(self, type_code): 23: 'rate' # SBML_RATE_RULE } return type_names.get(type_code, 'unknown') + + def _parse_function_definitions(self): + """Parse function definitions.""" + function_defs = [] + + for i in range(self.model.getNumFunctionDefinitions()): + func_def = self.model.getFunctionDefinition(i) + func_data = { + 'id': func_def.getId(), + 'name': func_def.getName() if func_def.isSetName() else func_def.getId(), + 'formula': formulaToString(func_def.getMath()) if func_def.isSetMath() else None, + 'math': formulaToString(func_def.getMath()) if func_def.isSetMath() else None, + 'notes': self._get_notes(func_def), + 'sbo_term': func_def.getSBOTermID() if func_def.isSetSBOTerm() else None + } + function_defs.append(func_data) + + return function_defs diff --git a/simba_ml/simulation/system_model/sbml_system_model.py b/simba_ml/simulation/system_model/sbml_system_model.py new file mode 100644 index 0000000..c4c31d8 --- /dev/null +++ b/simba_ml/simulation/system_model/sbml_system_model.py @@ -0,0 +1,583 @@ +"""SBML-based system model that implements the SystemModelInterface. + +This module provides a bridge between parsed SBML models and SimbaML's +system model framework, allowing SBML models to be used directly with +existing simulation and ML pipelines. +""" + +import typing +import logging +import numpy as np +import pandas as pd +from pathlib import Path + +from simba_ml.simulation.system_model import system_model_interface +from simba_ml.simulation.system_model import system_model +from simba_ml.simulation import species +from simba_ml.simulation import kinetic_parameters as kinetic_parameters_module +from simba_ml.simulation import distributions +from simba_ml.simulation.sparsifier import no_sparsifier +from simba_ml.simulation import noisers +from simba_ml.sbml_parser.main_parser import MainSBMLParser +from simba_ml.sbml_parser.ml_exporter import SBMLExporter + +logger = logging.getLogger(__name__) + + +class SBMLSystemModel(system_model.SystemModel): + """SystemModel implementation based on parsed SBML models.""" + + def __init__( + self, + sbml_file_path: typing.Optional[str] = None, + name: typing.Optional[str] = None, + sparsifier: typing.Optional[typing.Any] = None, + noiser: typing.Optional[noisers.Noiser] = None, + parameter_distributions: typing.Optional[dict] = None, + species_distributions: typing.Optional[dict] = None, + parsed_data: typing.Optional[dict] = None, + ml_exporter: typing.Optional[typing.Any] = None + ): + """Initialize SBML system model. + + Args: + sbml_file_path: Path to SBML model file (required if parsed_data is None) + name: Model name (defaults to SBML model name) + sparsifier: Sparsifier for output processing + noiser: Noiser for output processing + parameter_distributions: Custom parameter distributions + species_distributions: Custom species initial value distributions + parsed_data: Parsed SBML data + ml_exporter: SBMLExporter class + """ + # Use already-parsed data if provided, otherwise parse file + if parsed_data is not None and ml_exporter is not None: + self.sbml_data = parsed_data + self.ml_exporter = ml_exporter + self.sbml_file_path = parsed_data.get('metadata', {}).get('sbml_file_path', 'parsed_data') + else: + # Legacy workflow: parse from file + if sbml_file_path is None: + raise ValueError("Either sbml_file_path or (parsed_data + ml_exporter) must be provided") + self.sbml_file_path = sbml_file_path + self.parser = MainSBMLParser(sbml_file_path) + self.sbml_data = self.parser.process() + self.ml_exporter = SBMLExporter(self.sbml_data) + + # Set model name + fallback_name = Path(self.sbml_file_path).stem if self.sbml_file_path else 'SBML_Model' + model_name = name or self.sbml_data['sbml_info'].get('model_name', fallback_name) + + # Build species and parameters from parsed data + built_species = self._build_species(species_distributions) + built_parameters = self._build_kinetic_parameters(parameter_distributions) + built_deriv = self._build_derivative_function() + + # Initialize parent SystemModel with required parameters + super().__init__( + name=model_name, + specieses=list(built_species.values()), + kinetic_parameters=built_parameters, + deriv=built_deriv, + sparsifier=sparsifier or no_sparsifier.NoSparsifier(), + noiser=noiser or noisers.NoNoiser() + ) + + # Keep SBML-specific references for internal use + self._specieses = built_species + self._kinetic_parameters = built_parameters + self._deriv = built_deriv + + logger.info(f"Created SBML system model '{model_name}' with {len(built_species)} species") + + def _build_species(self, custom_distributions: typing.Optional[dict] = None) -> dict[str, species.Species]: + """Build species from SBML data.""" + species_dict = {} + custom_distributions = custom_distributions or {} + + for sp_data in self.sbml_data['species']: + sp_id = sp_data['id'] + sp_name = sp_data.get('name', sp_id) + + # Get initial value + initial_value = 1.0 + if sp_data.get('initial_concentration') is not None: + initial_value = sp_data['initial_concentration'] + elif sp_data.get('initial_amount') is not None: + initial_value = sp_data['initial_amount'] + + # Create distribution for initial values + if sp_id in custom_distributions: + initial_distribution = custom_distributions[sp_id] + else: + # Default: lognormal around initial value + if initial_value > 0: + initial_distribution = distributions.LogNormalDistribution( + mu=np.log(initial_value), + sigma=0.3 # 30% variability + ) + else: + initial_distribution = distributions.Constant(1e-6) + + # Determine if species should be in output + is_boundary = sp_data.get('boundary_condition', False) + is_constant = sp_data.get('constant', False) + contained_in_output = not (is_boundary or is_constant) + + species_obj = species.Species( + name=sp_id, + distribution=initial_distribution, + contained_in_output=contained_in_output + ) + + species_dict[sp_id] = species_obj + + return species_dict + + def _build_kinetic_parameters( + self, + custom_distributions: typing.Optional[dict] = None + ) -> dict[str, kinetic_parameters_module.KineticParameter]: + """Build kinetic parameters from SBML data.""" + params_dict = {} + custom_distributions = custom_distributions or {} + + # Global parameters + for param_data in self.sbml_data['parameters']: + param_id = param_data['id'] + param_value = param_data.get('value', 1.0) + + if param_id in custom_distributions: + distribution = custom_distributions[param_id] + else: + # Default: lognormal around parameter value + if param_value > 0: + distribution = distributions.LogNormalDistribution( + mu=np.log(param_value), + sigma=0.2 # 20% variability + ) + else: + distribution = distributions.Constant(param_value) + + params_dict[param_id] = kinetic_parameters_module.ConstantKineticParameter( + distribution=distribution + ) + + # Note: Local parameters are NOT stored globally to avoid conflicts + # They are handled individually for each reaction during rate evaluation + + return params_dict + + def _build_derivative_function(self) -> typing.Callable: + """Build derivative function from SBML reaction network.""" + + # Get dynamic species (non-boundary, non-constant) + dynamic_species = [] + for sp_data in self.sbml_data['species']: + is_boundary = sp_data.get('boundary_condition', False) + is_constant = sp_data.get('constant', False) + if not (is_boundary or is_constant): + dynamic_species.append(sp_data['id']) + + species_to_index = {sp_id: i for i, sp_id in enumerate(dynamic_species)} + + def derivative_func(t: float, y: list[float], kinetic_params: dict) -> tuple[float, ...]: + """Compute derivatives for dynamic species.""" + + # Create species concentration dictionary + species_conc = {} + for i, sp_id in enumerate(dynamic_species): + species_conc[sp_id] = max(y[i], 0.0) # Ensure non-negative + + # Add boundary species (from initial values) + for sp_data in self.sbml_data['species']: + if sp_data.get('boundary_condition', False): + sp_id = sp_data['id'] + initial_value = 1.0 + if sp_data.get('initial_concentration') is not None: + initial_value = sp_data['initial_concentration'] + elif sp_data.get('initial_amount') is not None: + initial_value = sp_data['initial_amount'] + species_conc[sp_id] = initial_value + + # Compute derivatives + dydt = [0.0] * len(dynamic_species) + + for reaction in self.sbml_data['reactions']: + if not reaction.get('kinetic_law'): + continue + + # Get rate formula + kinetic_law = reaction['kinetic_law'] + rate_formula = None + if kinetic_law.get('math'): + rate_formula = kinetic_law['math'] + elif kinetic_law.get('formula'): + rate_formula = kinetic_law['formula'] + + if not rate_formula: + continue + + # Evaluate reaction rate + try: + # Prepare local parameters (now using direct names) + local_params = {} + if kinetic_law.get('parameters'): + for local_param in kinetic_law['parameters']: + param_name = local_param['id'] + if param_name in kinetic_params: + local_params[param_name] = kinetic_params[param_name] + else: + local_params[param_name] = local_param.get('value', 1.0) + + rate = self._evaluate_rate_formula( + rate_formula, + species_conc, + kinetic_params, + local_params + ) + + # Apply stoichiometry to dynamic species + for reactant in reaction.get('reactants', []): + sp_id = reactant['species'] + if sp_id in species_to_index: + stoich = reactant.get('stoichiometry', 1.0) + dydt[species_to_index[sp_id]] -= stoich * rate + + for product in reaction.get('products', []): + sp_id = product['species'] + if sp_id in species_to_index: + stoich = product.get('stoichiometry', 1.0) + dydt[species_to_index[sp_id]] += stoich * rate + + except Exception as e: + logger.debug(f"Error evaluating rate for reaction {reaction['id']}: {e}") + continue + + # Process rate rules (direct ODE specification) + for rule in self.sbml_data.get('rules', []): + if rule.get('type') != 'rate': + continue + + variable = rule.get('variable') + if not variable or variable not in species_to_index: + continue + + rate_formula = rule.get('formula') or rule.get('math') + if not rate_formula: + continue + + try: + rate = self._evaluate_rate_formula( + rate_formula, + species_conc, + kinetic_params, + {} # Rate rules don't have local parameters + ) + # Rate rules directly specify dx/dt for the variable + dydt[species_to_index[variable]] = rate + except Exception as e: + logger.debug(f"Error evaluating rate rule for {variable}: {e}") + continue + + return tuple(dydt) + + return derivative_func + + def _evaluate_rate_formula( + self, + formula: str, + species_conc: dict, + kinetic_params: dict, + local_params: dict + ) -> float: + """Evaluate a rate formula with given concentrations and parameters.""" + + # Create evaluation context + eval_context = {} + eval_context.update(species_conc) + eval_context.update(kinetic_params) + eval_context.update(local_params) + + # Add global parameters from SBML + for param_data in self.sbml_data['parameters']: + param_id = param_data['id'] + if param_id not in eval_context: + eval_context[param_id] = param_data.get('value', 1.0) + + # Add compartments to evaluation context + for comp_data in self.sbml_data.get('compartments', []): + comp_id = comp_data['id'] + if comp_id not in eval_context: + eval_context[comp_id] = comp_data.get('size', 1.0) + + try: + # Substitute function calls with their mathematical expressions + expanded_formula = self._expand_function_calls(formula) + result = self._safe_evaluate_formula(expanded_formula, eval_context) + return float(result) if result is not None else 0.0 + except Exception as e: + logger.debug(f"Error evaluating formula '{formula}': {e}") + return 0.0 + + def _expand_function_calls(self, formula: str) -> str: + """Expand function calls in a formula to their mathematical expressions.""" + import re + + # Find function calls like function_1(arg1, arg2, ...) + function_pattern = r'(function_\d+)\s*\((.*?)\)' + + def replace_function(match): + func_name = match.group(1) + args_str = match.group(2) + + # Find the function definition + func_def = None + for f in self.sbml_data.get('function_definitions', []): + if f['id'] == func_name: + func_def = f + break + + if not func_def: + logger.warning(f"Function {func_name} not found, returning 1.0") + return "1.0" + + func_formula = func_def.get('formula', func_def.get('math', '')) + if not func_formula: + logger.warning(f"No formula found for function {func_name}, returning 1.0") + return "1.0" + + # Parse lambda function: lambda(arg1, arg2, ..., expression) + if func_formula.startswith('lambda('): + # Extract lambda arguments and body + lambda_content = func_formula[7:] # Remove 'lambda(' + if lambda_content.endswith(')'): + lambda_content = lambda_content[:-1] # Remove closing ')' + + # Split arguments from expression (last comma separates them) + parts = lambda_content.split(',') + if len(parts) < 2: + logger.warning(f"Invalid lambda function {func_name}, returning 1.0") + return "1.0" + + # Arguments are all but the last part + lambda_args = [arg.strip() for arg in parts[:-1]] + # Expression is the last part + expression = parts[-1].strip() + + # Parse the actual arguments passed to the function + actual_args = [arg.strip() for arg in args_str.split(',')] + + if len(actual_args) != len(lambda_args): + logger.warning(f"Argument count mismatch for {func_name}: expected {len(lambda_args)}, got {len(actual_args)}") + return "1.0" + + # Substitute arguments in the expression + substituted_expr = expression + for lambda_arg, actual_arg in zip(lambda_args, actual_args): + # Use word boundaries to avoid partial matches + substituted_expr = re.sub(r'\b' + re.escape(lambda_arg) + r'\b', actual_arg, substituted_expr) + + return f"({substituted_expr})" + + else: + logger.warning(f"Non-lambda function {func_name} not supported, returning 1.0") + return "1.0" + + # Replace all function calls + expanded = re.sub(function_pattern, replace_function, formula) + + return expanded + + def _safe_evaluate_formula(self, formula: str, context: dict) -> float: + try: + import numexpr as ne + # numexpr requires all variables to be arrays, so convert to scalars + for var, val in context.items(): + if isinstance(val, (int, float)): + context[var] = float(val) + return ne.evaluate(formula, local_dict=context) + except ImportError: + # Fallback: Use restricted eval with only math operations allowed + import math + import ast + import operator + + # Define allowed operations and functions + allowed_ops = { + ast.Add: operator.add, + ast.Sub: operator.sub, + ast.Mult: operator.mul, + ast.Div: operator.truediv, + ast.Pow: operator.pow, + ast.USub: operator.neg, + ast.UAdd: operator.pos, + } + + allowed_funcs = { + 'exp': math.exp, + 'log': math.log, + 'sqrt': math.sqrt, + 'sin': math.sin, + 'cos': math.cos, + 'tan': math.tan, + 'pow': pow, + 'abs': abs, + } + + allowed_constants = { + 'pi': math.pi, + 'e': math.e, + } + + def safe_eval_node(node): + if isinstance(node, ast.Num): # number + return node.n + elif isinstance(node, ast.Constant): # Python 3.8+ + return node.value + elif isinstance(node, ast.Name): # variable + var_name = node.id + if var_name in context: + return context[var_name] + elif var_name in allowed_constants: + return allowed_constants[var_name] + else: + raise ValueError(f"Unknown variable: {var_name}") + elif isinstance(node, ast.BinOp): # binary operation + left = safe_eval_node(node.left) + right = safe_eval_node(node.right) + op = allowed_ops.get(type(node.op)) + if op: + return op(left, right) + else: + raise ValueError(f"Unsupported operation: {type(node.op)}") + elif isinstance(node, ast.UnaryOp): # unary operation + operand = safe_eval_node(node.operand) + op = allowed_ops.get(type(node.op)) + if op: + return op(operand) + else: + raise ValueError(f"Unsupported unary operation: {type(node.op)}") + elif isinstance(node, ast.Call): # function call + func_name = node.func.id + if func_name in allowed_funcs: + args = [safe_eval_node(arg) for arg in node.args] + return allowed_funcs[func_name](*args) + else: + raise ValueError(f"Unsupported function: {func_name}") + else: + raise ValueError(f"Unsupported node type: {type(node)}") + + try: + tree = ast.parse(formula, mode='eval') + return safe_eval_node(tree.body) + except (SyntaxError, ValueError) as e: + logger.debug(f"Safe evaluation failed for '{formula}': {e}") + # Last resort: return 1.0 as default rate + return 1.0 + + + def apply_sparsifier(self, signal: pd.DataFrame) -> pd.DataFrame: + """Apply sparsification to the signal.""" + # Filter to output species only + output_species = [ + sp_id for sp_id, sp_obj in self._specieses.items() + if sp_obj.contained_in_output + ] + + filtered_signal = signal[output_species] if output_species else signal + return self.sparsifier.sparsify(filtered_signal) + + def get_clean_signal( + self, + start_values: dict[str, typing.Any], + sample_id: int, + deriv_noised: bool = True + ) -> pd.DataFrame: + """Generate a clean signal using time-series simulation. + + Note: This is mainly for compatibility. For steady-state generation, + use the numerical solvers directly. + """ + from scipy import integrate + + # Get time points + timestamps = start_values.get("timestamps", [1000])[sample_id] + t = np.linspace(0, timestamps, int(timestamps) + 1) + + # Get initial conditions for dynamic species + dynamic_species = [ + sp_id for sp_id, sp_obj in self._specieses.items() + if sp_obj.contained_in_output or not any([ + sp_data.get('boundary_condition', False) or sp_data.get('constant', False) + for sp_data in self.sbml_data['species'] if sp_data['id'] == sp_id + ]) + ] + + y0 = [ + start_values["specieses"][sp_id][sample_id] + for sp_id in dynamic_species + ] + + # Get kinetic parameters + kinetic_params = { + name: param.get_at_timestamp(sample_id, 0.0) + for name, param in self._kinetic_parameters.items() + } + + # Solve ODE + try: + result = integrate.solve_ivp( + fun=lambda t, y: self._deriv(t, y.tolist(), kinetic_params), + y0=y0, + t_span=(t[0], t[-1]), + t_eval=t, + method='LSODA', + atol=1e-6, + rtol=1e-3 + ) + + # Convert to DataFrame with dynamic species + signal_df = pd.DataFrame( + result.y.T, + columns=dynamic_species + ) + + # Add boundary species (they remain constant) + for sp_data in self.sbml_data['species']: + if sp_data.get('boundary_condition', False): + sp_id = sp_data['id'] + # Get boundary species initial value + if sp_id in start_values["specieses"]: + boundary_value = start_values["specieses"][sp_id][sample_id] + else: + # Use SBML initial value + boundary_value = sp_data.get('initial_concentration', + sp_data.get('initial_amount', 1.0)) + if boundary_value is None: + boundary_value = 1.0 + + # Add constant column for boundary species + signal_df[sp_id] = boundary_value + + return signal_df + + except Exception as e: + logger.error(f"Error in get_clean_signal: {e}") + # Return default DataFrame with dynamic species + signal_df = pd.DataFrame({sp_id: [y0[i]] for i, sp_id in enumerate(dynamic_species)}) + + # Add boundary species to default DataFrame too + for sp_data in self.sbml_data['species']: + if sp_data.get('boundary_condition', False): + sp_id = sp_data['id'] + if sp_id in start_values["specieses"]: + boundary_value = start_values["specieses"][sp_id][sample_id] + else: + boundary_value = sp_data.get('initial_concentration', + sp_data.get('initial_amount', 1.0)) + if boundary_value is None: + boundary_value = 1.0 + signal_df[sp_id] = boundary_value + + return signal_df From c23067194ddaa824ce6f1b7811b80173d15cb08d Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Sun, 5 Oct 2025 13:17:12 +0200 Subject: [PATCH 4/7] Add compute_steady_state() high-level API to SBMLSystemModel Supports multiple solver methods: lsoda, scipy, newton, bounded --- .../generators/steady_state_generator.py | 5 +- .../system_model/sbml_system_model.py | 218 +++++++++++++++++- 2 files changed, 221 insertions(+), 2 deletions(-) diff --git a/simba_ml/simulation/generators/steady_state_generator.py b/simba_ml/simulation/generators/steady_state_generator.py index eb9e7e7..ffcffaa 100644 --- a/simba_ml/simulation/generators/steady_state_generator.py +++ b/simba_ml/simulation/generators/steady_state_generator.py @@ -37,7 +37,7 @@ def _is_similar(self, series1: pd.Series, series2: pd.Series) -> bool: raise ValueError("Series have different lengths.") return all( - math.isclose(series1[i], series2[i], rel_tol=1e-05) + math.isclose(series1.iloc[i], series2.iloc[i], rel_tol=1e-05) for i in range(len(series1)) ) @@ -50,6 +50,9 @@ def __check_if_signal_has_steady_state(self, signal: pd.DataFrame) -> bool: Returns: True if the signal has a steady state, False otherwise. """ + # Check if signal has at least 2 rows to compare + if len(signal) < 2: + return False return self._is_similar(signal.iloc[-1], signal.iloc[-2]) def __add_parameters_to_table( diff --git a/simba_ml/simulation/system_model/sbml_system_model.py b/simba_ml/simulation/system_model/sbml_system_model.py index c4c31d8..065ae61 100644 --- a/simba_ml/simulation/system_model/sbml_system_model.py +++ b/simba_ml/simulation/system_model/sbml_system_model.py @@ -168,6 +168,80 @@ def _build_kinetic_parameters( return params_dict + def _build_exact_kinetic_parameters(self) -> dict[str, kinetic_parameters_module.KineticParameter]: + """Build kinetic parameters using exact SBML values (no distributions).""" + params_dict = {} + + # Global parameters - use exact values + for param_data in self.sbml_data['parameters']: + param_id = param_data['id'] + param_value = param_data.get('value', 1.0) + + # Use Constant distribution for exact values + distribution = distributions.Constant(param_value) + params_dict[param_id] = kinetic_parameters_module.ConstantKineticParameter( + distribution=distribution + ) + + return params_dict + + def _build_exact_species(self) -> dict[str, species.Species]: + """Build species using exact SBML initial values (no distributions).""" + species_dict = {} + + for sp_data in self.sbml_data['species']: + sp_id = sp_data['id'] + + # Get exact initial value + initial_value = 1.0 + if sp_data.get('initial_concentration') is not None: + initial_value = sp_data['initial_concentration'] + elif sp_data.get('initial_amount') is not None: + initial_value = sp_data['initial_amount'] + + # Use Constant distribution for exact values + initial_distribution = distributions.Constant(initial_value) + + # Determine if species should be in output + is_boundary = sp_data.get('boundary_condition', False) + is_constant = sp_data.get('constant', False) + contained_in_output = not (is_boundary or is_constant) + + species_obj = species.Species( + name=sp_id, + distribution=initial_distribution, + contained_in_output=contained_in_output + ) + + species_dict[sp_id] = species_obj + + return species_dict + + def use_exact_sbml_values(self): + """Switch to using exact SBML values instead of distributions.""" + print("🔧 Switching SimbaML to use exact SBML values...") + + # Replace distributions with exact values + exact_params = self._build_exact_kinetic_parameters() + exact_species = self._build_exact_species() + + # Update internal references + self._kinetic_parameters = exact_params + self._specieses = exact_species + + # Update parent class attributes + super().__init__( + name=self.name, + specieses=list(exact_species.values()), + kinetic_parameters=exact_params, + deriv=self._deriv, + sparsifier=self.sparsifier, + noiser=self.noiser + ) + + print(f" ✅ Using exact values for {len(exact_params)} parameters") + print(f" ✅ Using exact values for {len(exact_species)} species") + def _build_derivative_function(self) -> typing.Callable: """Build derivative function from SBML reaction network.""" @@ -448,7 +522,14 @@ def safe_eval_node(node): right = safe_eval_node(node.right) op = allowed_ops.get(type(node.op)) if op: - return op(left, right) + try: + result = op(left, right) + # Check for overflow/invalid results + if not np.isfinite(result): + return 0.0 # Return 0 for overflow/NaN/inf + return result + except (OverflowError, ZeroDivisionError): + return 0.0 # Return 0 for overflow or division by zero else: raise ValueError(f"Unsupported operation: {type(node.op)}") elif isinstance(node, ast.UnaryOp): # unary operation @@ -581,3 +662,138 @@ def get_clean_signal( signal_df[sp_id] = boundary_value return signal_df + + def compute_steady_state( + self, + method: str = 'lsoda', + t_max: float = 10000, + atol: float = 1e-8, + rtol: float = 1e-6, + **solver_kwargs + ) -> typing.Dict[str, typing.Any]: + """Compute steady-state using exact SBML initial values and parameters. + + This method provides a high-level API equivalent to Tellurium's steadyState(), + using the exact values from the SBML file without any sampling or variation. + + Args: + method: Solver method - 'lsoda' for ODE simulation, 'scipy'/'newton'/'bounded' for root-finding + t_max: Maximum integration time for ODE methods + atol: Absolute tolerance for ODE solver + rtol: Relative tolerance for ODE solver + **solver_kwargs: Additional solver-specific options + + Returns: + Dictionary containing: + - 'success': bool, whether computation succeeded + - 'values': np.ndarray of steady-state concentrations + - 'species': list of species names + - 'max_derivative': float, max |dx/dt| at steady-state + - 'message': str, diagnostic message + - 'method': str, method used + + Example: + >>> model = SBMLSystemModel(sbml_file_path='model.xml') + >>> result = model.compute_steady_state(method='lsoda') + >>> if result['success']: + >>> print(f"Steady-state: {result['values']}") + """ + from scipy import integrate + + try: + # Get exact species and parameters (no distributions) + exact_species = self._build_exact_species() + exact_params = self._build_exact_kinetic_parameters() + + # Build initial conditions for dynamic species only + y0 = [] + dynamic_species = [] + for sp_id, sp_obj in exact_species.items(): + if sp_obj.contained_in_output: # Only dynamic species + dynamic_species.append(sp_id) + # Extract value from Constant distribution + if hasattr(sp_obj.distribution, 'value'): + y0.append(sp_obj.distribution.value) + else: + y0.append(sp_obj.distribution.sample(1)[0]) + + # Build kinetic parameters dict (global params, local handled in _deriv) + params = {} + for param_id, param_obj in exact_params.items(): + if hasattr(param_obj.distribution, 'value'): + params[param_id] = param_obj.distribution.value + else: + params[param_id] = param_obj.distribution.sample(1)[0] + + # Solve using selected method + if method.lower() == 'lsoda': + # ODE simulation to steady-state + result = integrate.solve_ivp( + fun=lambda t, y: self._deriv(t, y.tolist(), params), + y0=y0, + t_span=(0, t_max), + method='LSODA', + atol=atol, + rtol=rtol + ) + + if not result.success: + return { + 'success': False, + 'values': np.array(y0), + 'species': dynamic_species, + 'max_derivative': np.inf, + 'message': f"LSODA failed: {result.message}", + 'method': method + } + + steady_state_values = result.y[:, -1] + final_derivs = self._deriv(t_max, steady_state_values.tolist(), params) + max_deriv = np.max(np.abs(final_derivs)) + + return { + 'success': True, + 'values': steady_state_values, + 'species': dynamic_species, + 'max_derivative': max_deriv, + 'message': f"Converged at t={t_max} with max|dx/dt|={max_deriv:.2e}", + 'method': method + } + + else: + # Numerical root-finding + from simba_ml.simulation import steady_state_solvers + + solution, success, message = steady_state_solvers.find_steady_state( + deriv_func=self._deriv, + initial_guess=y0, + kinetic_params=params, + solver_type=method, + **solver_kwargs + ) + + if success: + final_derivs = self._deriv(0, solution.tolist(), params) + max_deriv = np.max(np.abs(final_derivs)) + else: + max_deriv = np.inf + + return { + 'success': success, + 'values': solution, + 'species': dynamic_species, + 'max_derivative': max_deriv, + 'message': message, + 'method': method + } + + except Exception as e: + logger.error(f"Error in compute_steady_state: {e}") + return { + 'success': False, + 'values': np.array([]), + 'species': [], + 'max_derivative': np.inf, + 'message': f"Error: {str(e)}", + 'method': method + } From 5dc4f265c74def6529b721214f420879ab3826a9 Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Tue, 7 Oct 2025 17:05:58 +0200 Subject: [PATCH 5/7] Add local parameter support to steady-state datasets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local parameters are now included in generated datasets but kept constant to avoid biologicall impossible parameter combinations. Only species initial conditions are varied for dataset generation. Add function definition detection Add LSODA → bounded solver fallback for better convergence --- simba_ml/sbml_parser/level_2/parser.py | 3 +- simba_ml/sbml_parser/level_3/parser.py | 3 +- simba_ml/sbml_parser/main_parser.py | 8 ++ .../generators/steady_state_generator.py | 82 ++++++++++++------ .../system_model/sbml_system_model.py | 86 ++++++++++++++++++- 5 files changed, 151 insertions(+), 31 deletions(-) diff --git a/simba_ml/sbml_parser/level_2/parser.py b/simba_ml/sbml_parser/level_2/parser.py index d12eb9b..373c33e 100644 --- a/simba_ml/sbml_parser/level_2/parser.py +++ b/simba_ml/sbml_parser/level_2/parser.py @@ -63,7 +63,8 @@ def _get_sbml_info(self): 'num_species': self.model.getNumSpecies(), 'num_reactions': self.model.getNumReactions(), 'num_parameters': self.model.getNumParameters(), - 'num_compartments': self.model.getNumCompartments() + 'num_compartments': self.model.getNumCompartments(), + 'num_functions': self.model.getNumFunctionDefinitions() } def _parse_species(self): diff --git a/simba_ml/sbml_parser/level_3/parser.py b/simba_ml/sbml_parser/level_3/parser.py index 10a02eb..52f5a32 100644 --- a/simba_ml/sbml_parser/level_3/parser.py +++ b/simba_ml/sbml_parser/level_3/parser.py @@ -75,7 +75,8 @@ def _get_sbml_info(self): 'num_parameters': self.model.getNumParameters(), 'num_compartments': self.model.getNumCompartments(), 'num_events': self.model.getNumEvents() if hasattr(self.model, 'getNumEvents') else 0, - 'num_constraints': self.model.getNumConstraints() if hasattr(self.model, 'getNumConstraints') else 0 + 'num_constraints': self.model.getNumConstraints() if hasattr(self.model, 'getNumConstraints') else 0, + 'num_functions': self.model.getNumFunctionDefinitions() } def _parse_species(self): diff --git a/simba_ml/sbml_parser/main_parser.py b/simba_ml/sbml_parser/main_parser.py index b2ce963..2ba6c9a 100644 --- a/simba_ml/sbml_parser/main_parser.py +++ b/simba_ml/sbml_parser/main_parser.py @@ -88,6 +88,14 @@ def validate_ode_model(self, model): """ num_reactions = model.getListOfReactions().size() num_rules = model.getListOfRules().size() + num_functions = model.getListOfFunctionDefinitions().size() + + # Check for function definitions (unsupported feature) + if num_functions > 0: + logger.warning( + f"Model contains {num_functions} function definition(s). " + f"Function definitions are not yet supported and will cause errors." + ) # Check for rate rules (direct ODE specification) has_rate_rules = False diff --git a/simba_ml/simulation/generators/steady_state_generator.py b/simba_ml/simulation/generators/steady_state_generator.py index ffcffaa..d954f45 100644 --- a/simba_ml/simulation/generators/steady_state_generator.py +++ b/simba_ml/simulation/generators/steady_state_generator.py @@ -88,32 +88,64 @@ def __generate_steady_state( Raises: ValueError: if the generated signal has no steady state. """ - clean_signal = self.sm.get_clean_signal( - start_values=start_values, sample_id=sample_id - ) - - for key in start_values["specieses"]: - start_values["specieses"][key][sample_id] = clean_signal[key].iloc[-1] - - clean_signal = self.sm.get_clean_signal( - start_values=start_values, sample_id=sample_id, deriv_noised=False - ) - - if not self.__check_if_signal_has_steady_state(clean_signal): - raise ValueError("Signal has no steady state.") - - pertubation_std = 0.01 - for key in start_values["specieses"]: - start_values["specieses"][key][sample_id] = clean_signal[key].iloc[ - -1 - ] * random_generator.get_rng().normal(1, pertubation_std) + # Try LSODA time integration first + try: + clean_signal = self.sm.get_clean_signal( + start_values=start_values, sample_id=sample_id + ) + + for key in start_values["specieses"]: + start_values["specieses"][key][sample_id] = clean_signal[key].iloc[-1] + + clean_signal = self.sm.get_clean_signal( + start_values=start_values, sample_id=sample_id, deriv_noised=False + ) + + if self.__check_if_signal_has_steady_state(clean_signal): + pertubation_std = 0.01 + for key in start_values["specieses"]: + start_values["specieses"][key][sample_id] = clean_signal[key].iloc[ + -1 + ] * random_generator.get_rng().normal(1, pertubation_std) + + pertubated_signal = self.sm.get_clean_signal( + start_values=start_values, sample_id=sample_id + ) + if self.__check_if_signal_has_steady_state(pertubated_signal): + self.sm.apply_noisifier(clean_signal) + return clean_signal.iloc[-1] + except Exception: + pass # Fall through to bounded solver + + # Fallback to bounded solver if LSODA fails + try: + from simba_ml.simulation import steady_state_solvers + + # Get kinetic parameters for this sample + kinetic_params = { + name: param.get_at_timestamp(sample_id, 0.0) + for name, param in self.sm.kinetic_parameters.items() + } + + # Get initial guess from species start values + dynamic_species = [sp for sp, obj in self.sm.specieses.items() if obj.contained_in_output] + initial_guess = [start_values["specieses"][sp_name][sample_id] for sp_name in dynamic_species] + + # Use bounded solver + solution, success, message = steady_state_solvers.find_steady_state( + deriv_func=self.sm.deriv, + initial_guess=initial_guess, + kinetic_params=kinetic_params, + solver_type='bounded' + ) + + if success: + # Create result series from solution + result_dict = {sp_name: solution[i] for i, sp_name in enumerate(dynamic_species)} + return pd.Series(result_dict) + except Exception: + pass - pertubated_signal = self.sm.get_clean_signal( - start_values=start_values, sample_id=sample_id - ) - if self.__check_if_signal_has_steady_state(pertubated_signal): - self.sm.apply_noisifier(clean_signal) - return clean_signal.iloc[-1] raise ValueError("Signal has no steady state.") def generate_signals(self, n: int = 100) -> pd.DataFrame: diff --git a/simba_ml/simulation/system_model/sbml_system_model.py b/simba_ml/simulation/system_model/sbml_system_model.py index 065ae61..af3e5ab 100644 --- a/simba_ml/simulation/system_model/sbml_system_model.py +++ b/simba_ml/simulation/system_model/sbml_system_model.py @@ -68,6 +68,17 @@ def __init__( fallback_name = Path(self.sbml_file_path).stem if self.sbml_file_path else 'SBML_Model' model_name = name or self.sbml_data['sbml_info'].get('model_name', fallback_name) + # Check for unsupported SBML features + num_functions = self.sbml_data.get('sbml_info', {}).get('num_functions', 0) + if num_functions > 0: + raise NotImplementedError( + f"SBML model '{model_name}' contains {num_functions} function definition(s), which are not yet " + f"supported by SimbaML. Function definitions are user-defined mathematical functions (like custom " + f"reaction rate laws) that require lambda expression parsing and argument substitution. " + f"Please select a different SBML model without function definitions. " + f"See docs/KNOWN_LIMITATIONS.md for details and implementation roadmap." + ) + # Build species and parameters from parsed data built_species = self._build_species(species_distributions) built_parameters = self._build_kinetic_parameters(parameter_distributions) @@ -163,8 +174,33 @@ def _build_kinetic_parameters( distribution=distribution ) - # Note: Local parameters are NOT stored globally to avoid conflicts - # They are handled individually for each reaction during rate evaluation + # Add local parameters with unique names (reaction_id_param_id) to avoid conflicts + for reaction in self.sbml_data['reactions']: + kinetic_law = reaction.get('kinetic_law') + if kinetic_law and kinetic_law.get('parameters'): + for local_param in kinetic_law['parameters']: + local_param_id = local_param['id'] + local_param_value = local_param.get('value', 1.0) + + # Create unique parameter name: reaction_id__param_id + unique_param_name = f"{reaction['id']}__{local_param_id}" + + if unique_param_name in custom_distributions: + distribution = custom_distributions[unique_param_name] + else: + # Default: Keep local parameters CONSTANT at SBML values + # Rationale: Independent random sampling of many kinetic parameters creates + # biologically impossible combinations that don't converge to steady-state. + # For synthetic ML datasets, varying species initial conditions provides + # sufficient diversity while maintaining model stability. + # + # Future enhancement: Implement correlated parameter sampling that maintains + # biological relationships (e.g., scale all V_max together, scale all K_m together). + distribution = distributions.Constant(local_param_value) + + params_dict[unique_param_name] = kinetic_parameters_module.ConstantKineticParameter( + distribution=distribution + ) return params_dict @@ -183,6 +219,21 @@ def _build_exact_kinetic_parameters(self) -> dict[str, kinetic_parameters_module distribution=distribution ) + # Add local parameters with unique names + for reaction in self.sbml_data['reactions']: + kinetic_law = reaction.get('kinetic_law') + if kinetic_law and kinetic_law.get('parameters'): + for local_param in kinetic_law['parameters']: + local_param_id = local_param['id'] + local_param_value = local_param.get('value', 1.0) + + # Create unique parameter name: reaction_id__param_id + unique_param_name = f"{reaction['id']}__{local_param_id}" + distribution = distributions.Constant(local_param_value) + params_dict[unique_param_name] = kinetic_parameters_module.ConstantKineticParameter( + distribution=distribution + ) + return params_dict def _build_exact_species(self) -> dict[str, species.Species]: @@ -274,6 +325,24 @@ def derivative_func(t: float, y: list[float], kinetic_params: dict) -> tuple[flo initial_value = sp_data['initial_amount'] species_conc[sp_id] = initial_value + # Evaluate assignment rules (algebraic equations for derived quantities) + # These must be computed BEFORE reaction rates since reactions may depend on them + for rule in self.sbml_data.get('rules', []): + if rule.get('type') == 'assignment': + variable = rule.get('variable') + formula = rule.get('formula') or rule.get('math') + if variable and formula: + try: + value = self._evaluate_rate_formula( + formula, + species_conc, + kinetic_params, + {} + ) + species_conc[variable] = value + except Exception as e: + logger.debug(f"Error evaluating assignment rule for {variable}: {e}") + # Compute derivatives dydt = [0.0] * len(dynamic_species) @@ -294,14 +363,23 @@ def derivative_func(t: float, y: list[float], kinetic_params: dict) -> tuple[flo # Evaluate reaction rate try: - # Prepare local parameters (now using direct names) + # Prepare local parameters + # Local parameters are stored with unique names (reaction_id__param_id) in kinetic_params + # but need to be available by their original names for formula evaluation local_params = {} if kinetic_law.get('parameters'): for local_param in kinetic_law['parameters']: param_name = local_param['id'] - if param_name in kinetic_params: + unique_param_name = f"{reaction['id']}__{param_name}" + + # Check if this local parameter was sampled (exists in kinetic_params) + if unique_param_name in kinetic_params: + local_params[param_name] = kinetic_params[unique_param_name] + elif param_name in kinetic_params: + # Fallback: check if global parameter with same name exists local_params[param_name] = kinetic_params[param_name] else: + # Use default value from SBML local_params[param_name] = local_param.get('value', 1.0) rate = self._evaluate_rate_formula( From 81f5f75177cec6dbad83ab63c7b52986e4bb88ae Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Sat, 8 Nov 2025 21:01:20 +0100 Subject: [PATCH 6/7] checks initialAssignment variables in SBML --- simba_ml/sbml_parser/level_2/parser.py | 22 ++++++- simba_ml/sbml_parser/level_3/parser.py | 24 +++++-- .../generators/steady_state_generator.py | 63 ++++++++++++++++--- 3 files changed, 94 insertions(+), 15 deletions(-) diff --git a/simba_ml/sbml_parser/level_2/parser.py b/simba_ml/sbml_parser/level_2/parser.py index 373c33e..d15bb29 100644 --- a/simba_ml/sbml_parser/level_2/parser.py +++ b/simba_ml/sbml_parser/level_2/parser.py @@ -219,12 +219,28 @@ def _parse_initial_assignments(self): if hasattr(self.model, 'getNumInitialAssignments'): for i in range(self.model.getNumInitialAssignments()): assignment = self.model.getInitialAssignment(i) + + # Safely get formula - some SBML files may not have this method + formula = None + if hasattr(assignment, 'isSetFormula') and assignment.isSetFormula(): + formula = assignment.getFormula() + + # Safely get math expression + math_expr = None + if hasattr(assignment, 'isSetMath') and assignment.isSetMath(): + math_expr = formulaToString(assignment.getMath()) + + # Safely get SBO term + sbo_term = None + if hasattr(assignment, 'isSetSBOTerm') and assignment.isSetSBOTerm(): + sbo_term = assignment.getSBOTermID() + assign_data = { 'symbol': assignment.getSymbol(), - 'formula': assignment.getFormula() if assignment.isSetFormula() else None, - 'math': formulaToString(assignment.getMath()) if assignment.isSetMath() else None, + 'formula': formula, + 'math': math_expr, 'notes': self._get_notes(assignment), - 'sbo_term': assignment.getSBOTermID() if assignment.isSetSBOTerm() else None + 'sbo_term': sbo_term } assignments_list.append(assign_data) diff --git a/simba_ml/sbml_parser/level_3/parser.py b/simba_ml/sbml_parser/level_3/parser.py index 52f5a32..6cccc8c 100644 --- a/simba_ml/sbml_parser/level_3/parser.py +++ b/simba_ml/sbml_parser/level_3/parser.py @@ -234,13 +234,29 @@ def _parse_initial_assignments(self): for i in range(self.model.getNumInitialAssignments()): assignment = self.model.getInitialAssignment(i) + + # Safely get math expression + math_expr = None + if hasattr(assignment, 'isSetMath') and assignment.isSetMath(): + math_expr = formulaToString(assignment.getMath()) + + # Safely get SBO term + sbo_term = None + if hasattr(assignment, 'isSetSBOTerm') and assignment.isSetSBOTerm(): + sbo_term = assignment.getSBOTermID() + + # Safely get MetaId + metaid = None + if hasattr(assignment, 'isSetMetaId') and assignment.isSetMetaId(): + metaid = assignment.getMetaId() + assign_data = { 'symbol': assignment.getSymbol(), - 'formula': formulaToString(assignment.getMath()) if assignment.isSetMath() else None, - 'math': formulaToString(assignment.getMath()) if assignment.isSetMath() else None, + 'formula': math_expr, + 'math': math_expr, 'notes': self._get_notes(assignment), - 'sbo_term': assignment.getSBOTermID() if assignment.isSetSBOTerm() else None, - 'metaid': assignment.getMetaId() if assignment.isSetMetaId() else None + 'sbo_term': sbo_term, + 'metaid': metaid } assignments_list.append(assign_data) diff --git a/simba_ml/simulation/generators/steady_state_generator.py b/simba_ml/simulation/generators/steady_state_generator.py index d954f45..023f3eb 100644 --- a/simba_ml/simulation/generators/steady_state_generator.py +++ b/simba_ml/simulation/generators/steady_state_generator.py @@ -13,22 +13,53 @@ class SteadyStateGenerator: """Defines how to generate signals from a PredictionTask.""" def __init__(self, sm: system_model_interface.SystemModelInterface): - """Initializes the `PredictionTaskBuilder`. + """Initializes the `SteadyStateGenerator`. Args: sm: A `SystemModel`, for which the signals should be built. + + Raises: + ValueError: If the model cannot generate a valid steady state (e.g., ODE solver failure, + all species are boundary conditions, or malformed SBML). """ self.sm = sm - def _is_similar(self, series1: pd.Series, series2: pd.Series) -> bool: - """Checks if two series are similar. + # Validate that ODE solver works on this model by attempting one test sample + # This catches issues like malformed SBML files, broken derivative functions, etc. + try: + start_vals = sm.sample_start_values_from_hypercube(1) + _ = self.__generate_steady_state(start_vals, 0) + except ValueError as e: + # Re-raise ValueError (likely "Signal has no steady state") + raise ValueError( + f"Model '{sm.name}' failed ODE solver validation during initialization. " + f"This typically means:\n" + f"1. SBML file is malformed (check annotation elements)\n" + f"2. All species are marked as boundary conditions (use a different model)\n" + f"3. ODE system doesn't reach steady state with current solver settings\n" + f"\nOriginal error: {e}" + ) from e + except Exception as e: + # Catch any other unexpected errors + raise ValueError( + f"Model '{sm.name}' failed initialization validation: {type(e).__name__}: {e}" + ) from e + + def _is_similar(self, series1: pd.Series, series2: pd.Series, + abs_tol: float = 1e-8, rel_tol: float = 1e-4) -> bool: + """Checks if two series are similar using combined absolute and relative tolerance. + + Uses a robust tolerance check that works for both small values (near zero) + and large values: |a - b| <= abs_tol OR |a - b| / max(|a|, |b|) <= rel_tol Args: series1: The first series. series2: The second series. + abs_tol: Absolute tolerance threshold (default 1e-8, for near-zero values) + rel_tol: Relative tolerance threshold (default 1e-4, for proportional differences) Returns: - True if the series are similar, False otherwise. + True if all values are similar within tolerance, False otherwise. Raises: ValueError: if the series have different lengths. @@ -36,10 +67,26 @@ def _is_similar(self, series1: pd.Series, series2: pd.Series) -> bool: if len(series1) != len(series2): raise ValueError("Series have different lengths.") - return all( - math.isclose(series1.iloc[i], series2.iloc[i], rel_tol=1e-05) - for i in range(len(series1)) - ) + for i in range(len(series1)): + val1 = series1.iloc[i] + val2 = series2.iloc[i] + + # Check absolute tolerance first (good for values near zero) + abs_diff = abs(val1 - val2) + if abs_diff <= abs_tol: + continue + + # Check relative tolerance (good for large values) + max_abs = max(abs(val1), abs(val2)) + if max_abs > 0: + rel_diff = abs_diff / max_abs + if rel_diff <= rel_tol: + continue + + # Neither tolerance satisfied + return False + + return True def __check_if_signal_has_steady_state(self, signal: pd.DataFrame) -> bool: """Checks if a signal has a steady state. From 025f869c89d2ab13601ecd95f2d6a4a680278f36 Mon Sep 17 00:00:00 2001 From: YagmurSimsekk Date: Sun, 15 Mar 2026 16:29:36 +0100 Subject: [PATCH 7/7] adds modern CLI for SBML parsing and BioModels Integration --- docs/source/Usage/cli.rst | 144 +++++++++++- simba_ml/cli/biomodels.py | 33 ++- simba_ml/cli/commands/__init__.py | 1 + simba_ml/cli/commands/biomodels.py | 242 ++++++++++++++++++++ simba_ml/cli/commands/sbml.py | 307 +++++++++++++++++++++++++ simba_ml/cli/legacy_adapters.py | 48 ++++ simba_ml/cli/main.py | 62 +++++ simba_ml/cli/parse_sbml.py | 6 +- simba_ml/cli/utils.py | 23 ++ tests/sbml_parser/test_main_parser.py | 315 -------------------------- 10 files changed, 840 insertions(+), 341 deletions(-) create mode 100644 simba_ml/cli/commands/__init__.py create mode 100644 simba_ml/cli/commands/biomodels.py create mode 100644 simba_ml/cli/commands/sbml.py create mode 100644 simba_ml/cli/legacy_adapters.py create mode 100644 simba_ml/cli/main.py create mode 100644 simba_ml/cli/utils.py delete mode 100644 tests/sbml_parser/test_main_parser.py diff --git a/docs/source/Usage/cli.rst b/docs/source/Usage/cli.rst index afcef5b..5f59d6f 100644 --- a/docs/source/Usage/cli.rst +++ b/docs/source/Usage/cli.rst @@ -1,12 +1,146 @@ Using SimbaML CLI ================== -SimbaML provides a CLI with multiple commands. +SimbaML provides a modern CLI for SBML parsing, BioModels integration, and data generation. -To get a list of all available commands, run: +Installation +------------ - $ simba_ml --help +After installing SimbaML, the ``simba-ml`` command will be available: -To get help on a specific command, run: + $ simba-ml --help - $ simba_ml --help +For detailed help on any command, use: + + $ simba-ml --help + +SBML Parsing +------------ + +Parse and analyze SBML model files locally. + +Basic Usage +^^^^^^^^^^^ + + $ simba-ml sbml parse + +This command will: +- Detect SBML Level and Version +- Parse the model structure (species, reactions, parameters, compartments) +- Analyze species types (dynamic vs boundary conditions) +- Display ODE readiness assessment +- Show sample species and reactions +- Display model description + +The parser validates: +- SBML file format and compliance +- Presence of kinetic laws for ODE simulation +- Model connectivity and network structure + +Options +^^^^^^^ + +- ``--verbose, -v``: Show detailed parsing information +- ``--species-limit, -s INTEGER``: Number of species to display (default: 5) +- ``--reactions-limit, -r INTEGER``: Number of reactions to display (default: 5) +- ``--export {csv}``: Export model data to CSV format (currently supported) +- ``--output-dir, -o PATH``: Output directory for exports (default: ./sbml_exports) +- ``--quiet, -q``: Suppress visual output (JSON output only) + +Examples +^^^^^^^^ + +Parse a local SBML file: + + $ simba-ml sbml parse Garde2020.xml + +Parse with verbose output and custom display limits: + + $ simba-ml sbml parse model.xml --verbose --species-limit 10 --reactions-limit 10 + +Export model data to CSV format: + + $ simba-ml sbml parse model.xml --export csv --output-dir ./exported_data + +Get JSON output (quiet mode, useful for scripts): + + $ simba-ml sbml parse model.xml --quiet + +BioModels Integration +--------------------- + +Search and download SBML models from the `BioModels Database `_. + +Search for Models +^^^^^^^^^^^^^^^^^ + + $ simba-ml biomodels search [--limit ] + +The search command queries the BioModels REST API and displays: +- Model ID (e.g., BIOMD0000000505) +- Model name +- Format (SBML) + +Search examples: + + # Search for SIR models (limit 3 results) + $ simba-ml biomodels search "SIR" --limit 3 + + # Search for oscillation models (default 10 results) + $ simba-ml biomodels search "oscillation" + + # Search for cancer models + $ simba-ml biomodels search "cancer" + +Download Models +^^^^^^^^^^^^^^^ + + $ simba-ml biomodels download [--output-dir ] + +Downloads a specific BioModels model and saves it as an SBML XML file. + +Options: + +- ``--output-dir, -o PATH``: Directory to save the model (default: ./biomodels_downloads) + +Download examples: + + # Download a specific model + $ simba-ml biomodels download BIOMD0000000505 + + # Download to a custom directory + $ simba-ml biomodels download BIOMD0000000505 --output-dir ./my_models + +Complete Workflow +----------------- + +Here's a typical workflow for finding and analyzing a model: + +1. **Search for models of interest:** + + $ simba-ml biomodels search "SIR" + +2. **Download a model:** + + $ simba-ml biomodels download BIOMD0000000982 + +3. **Parse and analyze the downloaded model:** + + $ simba-ml sbml parse BIOMD0000000982_url.xml + +4. **Export data for machine learning:** + + $ simba-ml sbml parse BIOMD0000000982_url.xml --export csv --output-dir ./sir_data + +5. **Get JSON output for programmatic use:** + + $ simba-ml sbml parse BIOMD0000000982_url.xml --quiet + +Legacy CLI +---------- + +For backward compatibility, the legacy CLI interface is still available: + + $ python -m simba_ml.cli + +The modern CLI (``simba-ml``) is recommended for new workflows as it provides better formatting and improved user experience. diff --git a/simba_ml/cli/biomodels.py b/simba_ml/cli/biomodels.py index e653a67..89a1171 100644 --- a/simba_ml/cli/biomodels.py +++ b/simba_ml/cli/biomodels.py @@ -1,5 +1,5 @@ import click -from simba_ml.sbml_parser.biomodels_api import BioModelsAPI, download_biomodel, search_biomodels +from simba_ml.sbml_parser.biomodels_api import BioModelsAPI @click.group() @@ -14,7 +14,8 @@ def biomodels(): def download(model_id, output_dir): """Download an SBML model from BioModels Database.""" try: - file_path = download_biomodel(model_id, output_dir) + api = BioModelsAPI() + file_path = api.download_model(model_id, output_dir) click.echo(click.style(f"✅ Downloaded: {file_path}", fg='green')) except Exception as e: click.echo(click.style(f"❌ Error: {e}", fg='red'), err=True) @@ -28,34 +29,30 @@ def download(model_id, output_dir): def search(query, limit, detailed): """Search for models in BioModels Database.""" try: - models = search_biomodels(query, limit) - + api = BioModelsAPI() + models = api.search_models(query, limit) + if not models: click.echo(f"No models found for query: {query}") return - + click.echo(click.style(f"Found {len(models)} models for '{query}':", fg='cyan', bold=True)) click.echo() - + for i, model in enumerate(models, 1): - model_id = model.get('id', 'unknown') + model_id = model.get('model_id', 'unknown') name = model.get('name', 'No name available') - + click.echo(f"{i}. {click.style(model_id, fg='blue', bold=True)}") click.echo(f" {name}") - + if detailed: authors = model.get('submitter', 'Unknown authors') - publication = model.get('publication', {}) - pub_year = publication.get('year', 'Unknown year') - + date = model.get('submission_date', 'Unknown date') + click.echo(f" Authors: {authors}") - click.echo(f" Year: {pub_year}") - - if publication.get('title'): - title = publication['title'][:100] + ('...' if len(publication['title']) > 100 else '') - click.echo(f" Publication: {title}") - + click.echo(f" Submitted: {date}") + click.echo() except Exception as e: diff --git a/simba_ml/cli/commands/__init__.py b/simba_ml/cli/commands/__init__.py new file mode 100644 index 0000000..b51820f --- /dev/null +++ b/simba_ml/cli/commands/__init__.py @@ -0,0 +1 @@ +"""CLI commands package.""" \ No newline at end of file diff --git a/simba_ml/cli/commands/biomodels.py b/simba_ml/cli/commands/biomodels.py new file mode 100644 index 0000000..3e58ae1 --- /dev/null +++ b/simba_ml/cli/commands/biomodels.py @@ -0,0 +1,242 @@ +"""BioModels Database integration commands.""" + +import click +import json +from pathlib import Path + +from simba_ml.sbml_parser.biomodels_api import BioModelsAPI +from simba_ml.cli.utils import is_ode_ready + + +@click.group() +def biomodels(): + """Search and download models from BioModels Database.""" + pass + + +@biomodels.command() +@click.argument("query") +@click.option("--limit", "-l", default=10, help="Maximum number of results (default: 10)") +@click.option("--format", "-f", type=click.Choice(['table', 'json']), default='table', + help="Output format (default: table)") +def search(query, limit, format): + """Search BioModels Database for models.""" + try: + api = BioModelsAPI() + + if format == 'json': + # For programmatic use + click.echo("Searching BioModels...") + else: + click.echo(click.style(f"🔍 Searching BioModels for: '{query}'", fg='blue', bold=True)) + + results = api.search_models(query, limit=limit) + + if not results: + click.echo("No models found matching your query.") + return + + if format == 'json': + click.echo(json.dumps(results, indent=2)) + else: + _display_search_results(results) + + except Exception as e: + click.echo(click.style(f"Search failed: {e}", fg='red'), err=True) + raise click.Abort() + + +@biomodels.command() +@click.argument("model_id") +@click.option("--output-dir", "-o", type=click.Path(path_type=Path), default=Path("./biomodels"), + help="Download directory (default: ./biomodels)") +@click.option("--parse", "-p", is_flag=True, help="Parse the model after downloading") +@click.option("--verbose", "-v", is_flag=True, help="Show download progress") +def download(model_id, output_dir, parse, verbose): + """Download a model from BioModels Database.""" + try: + api = BioModelsAPI() + + if verbose: + click.echo(f"📥 Downloading model: {model_id}") + + # Download model + local_file = api.download_model(model_id, str(output_dir)) + + click.echo(click.style("✅ Download completed!", fg='green', bold=True)) + click.echo(f"📁 Saved to: {local_file}") + + if parse: + if verbose: + click.echo("🔍 Parsing downloaded model...") + + # Parse the model + from simba_ml.sbml_parser.main_parser import MainSBMLParser + + parser = MainSBMLParser(local_file) + result = parser.process() + info = result['sbml_info'] + + click.echo(f"\n📋 Model Summary:") + click.echo(f" • Name: {info['model_name']}") + click.echo(f" • Species: {info['num_species']}") + click.echo(f" • Reactions: {info['num_reactions']}") + click.echo(f" • Parameters: {info['num_parameters']}") + + except Exception as e: + click.echo(click.style(f"Download failed: {e}", fg='red'), err=True) + raise click.Abort() + + +@biomodels.command() +@click.argument("model_id") +def info(model_id): + """Get detailed information about a BioModels entry.""" + try: + api = BioModelsAPI() + + click.echo(f"🔍 Fetching information for: {model_id}") + + model_info = api.get_model_info(model_id) + + if not model_info: + click.echo("Model not found.") + return + + _display_model_info(model_info) + + except Exception as e: + click.echo(click.style(f"Failed to get model info: {e}", fg='red'), err=True) + raise click.Abort() + + +@biomodels.command() +@click.argument("query") +@click.argument("output_dir", type=click.Path(path_type=Path)) +@click.option("--limit", "-l", default=10, help="Maximum models to download (default: 10)") +@click.option("--parse", "-p", is_flag=True, help="Parse models after downloading") +def batch_download(query, output_dir, limit, parse): + """Search and download multiple models matching a query.""" + try: + api = BioModelsAPI() + + click.echo(f"🔍 Searching for models: '{query}'") + results = api.search_models(query, limit=limit) + + if not results: + click.echo("No models found.") + return + + click.echo(f"📥 Downloading {len(results)} models to {output_dir}") + + successful_downloads = [] + failed_downloads = [] + + with click.progressbar(results, label='Downloading models') as bar: + for model in bar: + try: + model_id = model['model_id'] + local_file = api.download_model(model_id, str(output_dir)) + successful_downloads.append((model_id, local_file)) + except Exception as e: + failed_downloads.append((model_id, str(e))) + + # Summary + click.echo(f"\n✅ Downloaded: {len(successful_downloads)} models") + if failed_downloads: + click.echo(f"❌ Failed: {len(failed_downloads)} models") + for model_id, error in failed_downloads: + click.echo(f" • {model_id}: {error}") + + # Parse if requested + if parse and successful_downloads: + click.echo(f"\n🔍 Parsing downloaded models...") + + from simba_ml.sbml_parser.main_parser import MainSBMLParser + + parsed_summary = [] + for model_id, local_file in successful_downloads: + try: + parser = MainSBMLParser(local_file) + result = parser.process() + info = result['sbml_info'] + parsed_summary.append({ + 'model_id': model_id, + 'species': info['num_species'], + 'reactions': info['num_reactions'], + 'ode_ready': is_ode_ready(result) + }) + except Exception: + parsed_summary.append({ + 'model_id': model_id, + 'species': 'Error', + 'reactions': 'Error', + 'ode_ready': False + }) + + # Display parsing summary + click.echo(f"\n📊 Parsing Summary:") + click.echo(f"{'Model ID':<20} {'Species':<8} {'Reactions':<10} {'ODE Ready'}") + click.echo("-" * 50) + for summary in parsed_summary: + ode_status = "✅" if summary['ode_ready'] else "❌" + click.echo(f"{summary['model_id']:<20} {summary['species']:<8} {summary['reactions']:<10} {ode_status}") + + except Exception as e: + click.echo(click.style(f"Batch download failed: {e}", fg='red'), err=True) + raise click.Abort() + + +def _display_search_results(results): + """Display search results in a formatted table.""" + click.echo(f"\n📊 Found {len(results)} models:") + click.echo("-" * 80) + click.echo(f"{'Model ID':<15} {'Name':<50} {'Format':<10}") + click.echo("-" * 80) + + for model in results: + model_id = model.get('model_id', 'Unknown') + name = model.get('name', 'Unknown')[:47] + "..." if len(model.get('name', '')) > 50 else model.get('name', 'Unknown') + format_info = model.get('format', 'SBML') + + click.echo(f"{model_id:<15} {name:<50} {format_info:<10}") + + click.echo("-" * 80) + click.echo(f"\n💡 Use 'simba-ml biomodels download MODEL_ID' to download a model") + + +def _display_model_info(model_info): + """Display detailed model information.""" + click.echo(f"\n📋 BioModels Entry Information") + click.echo("=" * 50) + + # Basic info + click.echo(f"Model ID: {model_info.get('model_id', 'Unknown')}") + click.echo(f"Name: {model_info.get('name', 'Unknown')}") + + if model_info.get('description'): + click.echo(f"Description: {model_info['description'][:200]}...") + + if model_info.get('publication'): + pub = model_info['publication'] + click.echo(f"\n📚 Publication:") + if pub.get('title'): + click.echo(f" Title: {pub['title']}") + if pub.get('authors'): + click.echo(f" Authors: {pub['authors']}") + if pub.get('journal'): + click.echo(f" Journal: {pub['journal']}") + + if model_info.get('curation_status'): + click.echo(f"\n✅ Curation: {model_info['curation_status']}") + + if model_info.get('submitter'): + click.echo(f"👤 Submitter: {model_info['submitter']}") + + if model_info.get('submission_date'): + click.echo(f"📅 Submitted: {model_info['submission_date']}") + + if model_info.get('file_size'): + click.echo(f"📁 Size: {model_info['file_size']}") + + click.echo(f"\n🔗 BioModels URL: https://www.ebi.ac.uk/biomodels/{model_info.get('model_id', '')}") diff --git a/simba_ml/cli/commands/sbml.py b/simba_ml/cli/commands/sbml.py new file mode 100644 index 0000000..48248ed --- /dev/null +++ b/simba_ml/cli/commands/sbml.py @@ -0,0 +1,307 @@ +"""SBML parsing and analysis commands.""" + +import click +import json +import os +from pathlib import Path + +from simba_ml.sbml_parser.main_parser import MainSBMLParser, UnsupportedSBMLVersionError, SBMLParsingError +from simba_ml.sbml_parser.ml_exporter import SBMLExporter +from simba_ml.cli.utils import is_ode_ready + + +@click.group() +def sbml(): + """Parse and analyze SBML models.""" + pass + + +@sbml.command() +@click.argument("file", type=click.Path(exists=True, path_type=Path)) +@click.option("--verbose", "-v", is_flag=True, help="Show detailed parsing information") +@click.option("--species-limit", "-s", default=5, help="Number of species to display (default: 5)") +@click.option("--reactions-limit", "-r", default=5, help="Number of reactions to display (default: 5)") +@click.option("--export", "-e", type=click.Choice(['csv']), help="Export data in CSV format (only format supported)") +@click.option("--output-dir", "-o", type=click.Path(path_type=Path), default=Path("./sbml_exports"), + help="Output directory for exports (default: ./sbml_exports)") +@click.option("--quiet", "-q", is_flag=True, help="Suppress visual output") +def parse(file, verbose, species_limit, reactions_limit, export, output_dir, quiet): + """Parse an SBML file and display model information.""" + try: + sbml_parser = MainSBMLParser(str(file)) + result = sbml_parser.process() + + if quiet: + # JSON output for programmatic use + info = result['sbml_info'] + summary = { + 'model_id': info['model_id'], + 'model_name': info['model_name'], + 'sbml_level': info['level'], + 'sbml_version': info['version'], + 'num_species': info['num_species'], + 'num_reactions': info['num_reactions'], + 'num_parameters': info['num_parameters'], + 'num_compartments': info['num_compartments'], + 'ode_ready': is_ode_ready(result) + } + click.echo(json.dumps(summary, indent=2)) + return + + _display_model_info(result, file, species_limit, reactions_limit, verbose) + + # Handle export if requested + if export: + if not quiet: + click.echo(f"\n🔬 Exporting data...") + + try: + exporter = SBMLExporter(result) + exported_files = exporter.export_to_files(str(output_dir), format=export) + + if not quiet: + click.echo(click.style(f"📁 Data exported to: {output_dir}", fg='green')) + for data_type, file_path in exported_files.items(): + click.echo(f" • {data_type}: {Path(file_path).name}") + + # Show ML statistics (dataset summary) + ml_dataset = exporter.get_ml_dataset() + click.echo(f"\n📊 Export Summary:") + if 'matrices' in ml_dataset: + S = ml_dataset['matrices']['stoichiometry'] + A = ml_dataset['matrices']['adjacency'] + click.echo(f" • Stoichiometry matrix: {S.shape}") + click.echo(f" • Adjacency matrix: {A.shape}") + click.echo(f" • Network density: {(A.sum() / (A.shape[0] * A.shape[1]) * 100):.1f}%") + else: + # Quiet mode: just print the export location + main_file = list(exported_files.values())[0] if exported_files else str(output_dir) + click.echo(f"Exported to: {main_file}") + + except Exception as e: + if not quiet: + click.echo(click.style(f"Export failed: {e}", fg='red'), err=True) + else: + click.echo(f"Export error: {e}", err=True) + + except (SBMLParsingError, UnsupportedSBMLVersionError) as e: + click.echo(click.style(f"Error: {e}", fg='red'), err=True) + raise click.Abort() + except Exception as e: + click.echo(click.style(f"Unexpected error: {e}", fg='red'), err=True) + raise click.Abort() + + +@sbml.command() +@click.argument("file", type=click.Path(exists=True, path_type=Path)) +@click.option("--format", "-f", type=click.Choice(['csv']), default='csv', + help="Export format (only CSV supported)") +@click.option("--output-dir", "-o", type=click.Path(path_type=Path), default=Path("./sbml_exports"), + help="Output directory (default: ./sbml_exports)") +@click.option("--verbose", "-v", is_flag=True, help="Show export details") +def export(file, format, output_dir, verbose): + """Export SBML model matrices in CSV format for steady-state workflows.""" + try: + if verbose: + click.echo(f"Parsing SBML file: {file}") + + sbml_parser = MainSBMLParser(str(file)) + result = sbml_parser.process() + + if verbose: + click.echo(f"Creating {format} export...") + + exporter = SBMLExporter(result) + exported_files = exporter.export_to_files(str(output_dir), format=format) + + click.echo(click.style("✅ Export completed!", fg='green', bold=True)) + click.echo(f"📁 Output directory: {output_dir}") + + for data_type, file_path in exported_files.items(): + click.echo(f" • {data_type}: {Path(file_path).name}") + + if verbose: + # Show dataset statistics + ml_dataset = exporter.get_ml_dataset() + click.echo("\n📊 Dataset Summary:") + if 'matrices' in ml_dataset: + S = ml_dataset['matrices']['stoichiometry'] + A = ml_dataset['matrices']['adjacency'] + click.echo(f" • Stoichiometry matrix: {S.shape}") + click.echo(f" • Adjacency matrix: {A.shape}") + click.echo(f" • Network density: {(A.sum() / (A.shape[0] * A.shape[1]) * 100):.1f}%") + + except Exception as e: + click.echo(click.style(f"Export failed: {e}", fg='red'), err=True) + raise click.Abort() + + + + +@sbml.command() +@click.argument("file", type=click.Path(exists=True, path_type=Path)) +def validate(file): + """Validate SBML file and check for common issues.""" + try: + sbml_parser = MainSBMLParser(str(file)) + result = sbml_parser.process() + + click.echo(click.style("🔍 SBML Validation Results", fg='blue', bold=True)) + click.echo("=" * 50) + + # Basic validation + click.echo("✅ File parsed successfully") + click.echo("✅ SBML structure is valid") + + # Check for common issues + issues = [] + warnings = [] + + # Check for missing initial values + species_without_initial = [ + sp for sp in result['species'] + if sp.get('initial_concentration') is None and sp.get('initial_amount') is None + ] + if species_without_initial: + warnings.append(f"{len(species_without_initial)} species missing initial values") + + # Check for reactions without kinetic laws + reactions_without_kinetics = [ + rxn for rxn in result['reactions'] + if not rxn.get('kinetic_law') + ] + if reactions_without_kinetics: + warnings.append(f"{len(reactions_without_kinetics)} reactions missing kinetic laws") + + # Check for empty reactions + empty_reactions = [ + rxn for rxn in result['reactions'] + if not rxn.get('reactants') and not rxn.get('products') + ] + if empty_reactions: + issues.append(f"{len(empty_reactions)} reactions have no reactants or products") + + # Report results + if not issues and not warnings: + click.echo("✅ No issues found") + else: + if warnings: + click.echo(f"\n⚠️ Warnings ({len(warnings)}):") + for warning in warnings: + click.echo(f" • {warning}") + + if issues: + click.echo(f"\n❌ Issues ({len(issues)}):") + for issue in issues: + click.echo(f" • {issue}") + + # Recommendations + info = result['sbml_info'] + if info['level'] == 3: + click.echo(f"\n💡 Recommendations:") + click.echo(f" • SBML Level 3 detected - ensure all units are explicitly defined") + if not any([info.get('substance_unit'), info.get('time_unit'), info.get('volume_unit')]): + click.echo(f" • Consider adding unit definitions for better model clarity") + + except Exception as e: + click.echo(click.style(f"Validation failed: {e}", fg='red'), err=True) + raise click.Abort() + + +def _display_model_info(result, file, species_limit, reactions_limit, verbose): + """Display formatted model information.""" + info = result['sbml_info'] + + # Header + click.echo(click.style("=" * 60, fg='green')) + click.echo(click.style(f"SBML Model Analysis", fg='green', bold=True)) + click.echo(click.style("=" * 60, fg='green')) + click.echo() + + # Basic info + click.echo(click.style(f"📄 File:", fg='blue', bold=True) + f" {file.name}") + click.echo(click.style(f"📋 Model:", fg='blue', bold=True) + f" {info['model_name']}") + click.echo(click.style(f"🆔 ID:", fg='blue', bold=True) + f" {info['model_id']}") + click.echo(click.style(f"🔢 SBML:", fg='blue', bold=True) + f" Level {info['level']}, Version {info['version']}") + click.echo() + + # Statistics + click.echo(click.style("📊 Model Statistics:", fg='cyan', bold=True)) + click.echo(f" • Species: {info['num_species']}") + click.echo(f" • Reactions: {info['num_reactions']}") + click.echo(f" • Parameters: {info['num_parameters']}") + click.echo(f" • Compartments: {info['num_compartments']}") + + # Boundary species analysis + boundary_count = sum(1 for sp in result['species'] if sp.get('boundary_condition', False)) + dynamic_count = info['num_species'] - boundary_count + + click.echo(f"\n🧬 Species Analysis:") + click.echo(f" • Dynamic species: {dynamic_count}") + click.echo(f" • Boundary species: {boundary_count}") + click.echo() + + # ODE readiness + is_ode_ready_result = is_ode_ready(result) + if is_ode_ready_result: + # Determine the type of ODE model + has_kinetic_laws = any(r.get('kinetic_law') for r in result['reactions']) + has_rate_rules = any(rule.get('type') == 'rate' for rule in result.get('rules', [])) + + if has_kinetic_laws and has_rate_rules: + model_type = " (reaction + rule-based)" + elif has_rate_rules: + model_type = " (rule-based)" + else: + model_type = " (reaction-based)" + + click.echo(click.style("✅ ODE Ready:", fg='green', bold=True) + f" Suitable for simulation{model_type}") + else: + click.echo(click.style("⚠️ Warning:", fg='yellow', bold=True) + " No kinetic laws or rate rules found") + click.echo() + + # Sample species + if result['species']: + click.echo(click.style(f"🧬 Sample Species (showing {min(species_limit, len(result['species']))}):", fg='magenta', bold=True)) + for i, species in enumerate(result['species'][:species_limit]): + boundary = " (boundary)" if species.get('boundary_condition') else "" + initial = "" + if species.get('initial_concentration') is not None: + initial = f" [C₀={species['initial_concentration']}]" + elif species.get('initial_amount') is not None: + initial = f" [A₀={species['initial_amount']}]" + click.echo(f" {i+1:2d}. {species['id']} in {species['compartment']}{boundary}{initial}") + + if len(result['species']) > species_limit: + remaining = len(result['species']) - species_limit + click.echo(f" ... and {remaining} more") + click.echo() + + # Sample reactions + if result['reactions']: + click.echo(click.style(f"⚗️ Sample Reactions (showing {min(reactions_limit, len(result['reactions']))}):", fg='red', bold=True)) + for i, reaction in enumerate(result['reactions'][:reactions_limit]): + reactants = " + ".join([r['species'] for r in reaction.get('reactants', [])]) + products = " + ".join([p['species'] for p in reaction.get('products', [])]) + arrow = " ⇌ " if reaction.get('reversible', False) else " → " + kinetic = " (kinetic ✓)" if reaction.get('kinetic_law') else " (kinetic ✗)" + click.echo(f" {i+1:2d}. {reaction['id']}: {reactants}{arrow}{products}{kinetic}") + + if len(result['reactions']) > reactions_limit: + remaining = len(result['reactions']) - reactions_limit + click.echo(f" ... and {remaining} more") + click.echo() + + # Verbose info + if verbose and info.get('notes'): + click.echo(click.style("📝 Description:", fg='white', bold=True)) + notes_preview = info['notes'][:300] + "..." if len(info['notes']) > 300 else info['notes'] + click.echo(f" {notes_preview}") + click.echo() + + # Always show description (not just in verbose mode) + if not verbose and info.get('notes'): + click.echo(click.style("📝 Description:", fg='white', bold=True)) + notes_preview = info['notes'][:200] + "..." if len(info['notes']) > 200 else info['notes'] + click.echo(f" {notes_preview}") + click.echo() diff --git a/simba_ml/cli/legacy_adapters.py b/simba_ml/cli/legacy_adapters.py new file mode 100644 index 0000000..2b74a90 --- /dev/null +++ b/simba_ml/cli/legacy_adapters.py @@ -0,0 +1,48 @@ +"""Legacy command adapters for backward compatibility.""" + +import click +from simba_ml.cli.commands.sbml import parse as sbml_parse +from simba_ml.cli.commands.biomodels import search as biomodels_search, download as biomodels_download +from simba_ml.cli.commands.steady_state import generate as steady_state_generate + + +@click.command() +@click.argument("file", type=click.Path(exists=True)) +@click.option("--verbose", "-v", is_flag=True, help="Show detailed parsing information") +@click.option("--species-limit", "-s", default=5, help="Number of species to display (default: 5)") +@click.option("--reactions-limit", "-r", default=5, help="Number of reactions to display (default: 5)") +@click.option("--export", "-e", type=click.Choice(['csv', 'json', 'npz']), help="Export data in specified format") +@click.option("--output-dir", "-o", default="./sbml_ml_data", help="Output directory for exported data") +@click.option("--quiet", "-q", is_flag=True, help="Suppress visual output, only export data") +def parse_sbml(file, verbose, species_limit, reactions_limit, export, output_dir, quiet): + """Parse an SBML file and print a summary of the model (Legacy).""" + # Convert to Path object for compatibility + from pathlib import Path + file_path = Path(file) + + # Call the new implementation with all options + ctx = click.get_current_context() + ctx.invoke(sbml_parse, file=file_path, verbose=verbose, + species_limit=species_limit, reactions_limit=reactions_limit, + export=export, output_dir=Path(output_dir), quiet=quiet) + + +@click.group() +def biomodels(): + """BioModels Database commands (Legacy).""" + pass + + +# Add the new subcommands to the legacy biomodels group +biomodels.add_command(biomodels_search, name="search") +biomodels.add_command(biomodels_download, name="download") + + +@click.group() +def steady_state(): + """Generate steady-state data from SBML models (Legacy).""" + pass + + +# Add the new subcommands to the legacy steady-state group +steady_state.add_command(steady_state_generate, name="generate") diff --git a/simba_ml/cli/main.py b/simba_ml/cli/main.py new file mode 100644 index 0000000..a1ab7ff --- /dev/null +++ b/simba_ml/cli/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +""" +SimbaML - Machine Learning for Systems Biology + +A framework for integrating prior knowledge of ODE models into machine learning +workflows through synthetic data augmentation. +""" + +import click +import sys +from pathlib import Path + +# Import subcommands +from simba_ml.cli.commands import sbml, biomodels +# TODO: Implement these commands +# from simba_ml.cli.commands import steady_state, generate, predict + + +@click.group() +@click.version_option() +@click.pass_context +def simba_ml(ctx): + """ + SimbaML - Machine Learning for Systems Biology + + A framework for integrating biological models with machine learning workflows. + Generate synthetic data from SBML models and train ML models for biological predictions. + + Examples: + + # Parse an SBML model + simba-ml sbml parse model.xml + + # Download from BioModels + simba-ml biomodels download BIOMD0000000505 + + # Generate steady-state data + simba-ml steady-state generate model.xml --samples 1000 + + # Generate time-series data + simba-ml generate data config.toml --samples 500 + """ + # Ensure commands are found + ctx.ensure_object(dict) + + +# Add command groups +simba_ml.add_command(sbml.sbml) +simba_ml.add_command(biomodels.biomodels) +# TODO: Add when implemented +# simba_ml.add_command(steady_state.steady_state) +# simba_ml.add_command(generate.generate) +# simba_ml.add_command(predict.predict) + + +def main(): + """Entry point for the simba-ml command.""" + simba_ml() + + +if __name__ == '__main__': + main() diff --git a/simba_ml/cli/parse_sbml.py b/simba_ml/cli/parse_sbml.py index 8886f3f..e42155d 100644 --- a/simba_ml/cli/parse_sbml.py +++ b/simba_ml/cli/parse_sbml.py @@ -2,7 +2,7 @@ import json import os from simba_ml.sbml_parser.main_parser import MainSBMLParser, UnsupportedSBMLVersionError, SBMLParsingError -from simba_ml.sbml_parser.ml_exporter import SBMLMLExporter +from simba_ml.sbml_parser.ml_exporter import SBMLExporter @click.command() @click.argument("file", type=click.Path(exists=True)) @@ -20,7 +20,7 @@ def parse_sbml(file, verbose, species_limit, reactions_limit, export, output_dir # If quiet mode and no export, just export the JSON to stdout and return if quiet and not export: - exporter = SBMLMLExporter(result) + exporter = SBMLExporter(result) ml_dataset = exporter.get_ml_dataset() click.echo(json.dumps(ml_dataset, indent=2, default=str)) return @@ -124,7 +124,7 @@ def parse_sbml(file, verbose, species_limit, reactions_limit, export, output_dir click.echo(click.style("🔬 Exporting data...", fg='cyan', bold=True)) try: - exporter = SBMLMLExporter(result) + exporter = SBMLExporter(result) exported_files = exporter.export_to_files(output_dir, format=export) if not quiet: diff --git a/simba_ml/cli/utils.py b/simba_ml/cli/utils.py new file mode 100644 index 0000000..1577336 --- /dev/null +++ b/simba_ml/cli/utils.py @@ -0,0 +1,23 @@ +"""Shared CLI utilities for SimbaML command-line interface.""" + + +def is_ode_ready(sbml_data): + """Check if SBML model is ready for ODE simulation. + + Args: + sbml_data: Parsed SBML model data dictionary containing reactions and rules. + + Returns: + bool: True if model has kinetic laws or rate rules for ODE simulation. + """ + # Check for reactions with kinetic laws + has_kinetic_laws = any(r.get('kinetic_law') for r in sbml_data.get('reactions', [])) + + # Check for rate rules (direct ODE specification) + has_rate_rules = False + rules = sbml_data.get('rules', []) + if rules: + # Rate rules have a 'type' field with value 'rate' + has_rate_rules = any(rule.get('type') == 'rate' for rule in rules) + + return has_kinetic_laws or has_rate_rules diff --git a/tests/sbml_parser/test_main_parser.py b/tests/sbml_parser/test_main_parser.py deleted file mode 100644 index adabc83..0000000 --- a/tests/sbml_parser/test_main_parser.py +++ /dev/null @@ -1,315 +0,0 @@ -""" -Tests for the main SBML parser functionality. -""" -import pytest -import tempfile -import os -from simba_ml.sbml_parser.main_parser import MainSBMLParser, SBMLParsingError, UnsupportedSBMLVersionError - - -class TestMainSBMLParser: - """Test the main SBML parser functionality.""" - - def test_init(self): - """Test parser initialization.""" - parser = MainSBMLParser("test_file.xml") - assert parser.file_path == "test_file.xml" - assert parser.level is None - assert parser.version is None - assert parser.model is None - - def test_supported_versions(self): - """Test that supported versions are correctly defined.""" - expected_versions = { - (2, 4): "level_2.parser", - (2, 5): "level_2.parser", - (3, 1): "level_3.parser", - (3, 2): "level_3.parser" - } - assert MainSBMLParser.SUPPORTED_VERSIONS == expected_versions - - def test_get_parser_module_supported(self): - """Test getting parser module for supported versions.""" - parser = MainSBMLParser("test.xml") - - # Test Level 2 versions - assert parser.get_parser_module(2, 4) == "level_2.parser" - assert parser.get_parser_module(2, 5) == "level_2.parser" - - # Test Level 3 versions - assert parser.get_parser_module(3, 1) == "level_3.parser" - assert parser.get_parser_module(3, 2) == "level_3.parser" - - def test_get_parser_module_unsupported(self): - """Test error for unsupported versions.""" - parser = MainSBMLParser("test.xml") - - with pytest.raises(UnsupportedSBMLVersionError) as excinfo: - parser.get_parser_module(1, 2) - - assert "Level 1 Version 2 is not supported" in str(excinfo.value) - assert "Supported versions:" in str(excinfo.value) - - def test_detect_version_and_level_invalid_file(self): - """Test error handling for invalid file.""" - parser = MainSBMLParser("nonexistent_file.xml") - - with pytest.raises(SBMLParsingError): - parser.detect_version_and_level() - - def test_validate_ode_model_no_reactions(self): - """Test validation warning for models without reactions.""" - # Create a minimal SBML model without reactions for testing - sbml_content = ''' - - - - - - - - - -''' - - with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: - f.write(sbml_content) - f.flush() - - try: - parser = MainSBMLParser(f.name) - level, version, model = parser.detect_version_and_level() - - # Should not raise error, just log warning - parser.validate_ode_model(model) - - assert level == 3 - assert version == 1 - assert model is not None - - finally: - os.unlink(f.name) - - def test_detect_version_level_with_valid_sbml(self): - """Test detection with valid SBML content.""" - sbml_content = ''' - - - - - - - - - - - - - - - - - - - - - - k - A - - - - - - - - - -''' - - with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: - f.write(sbml_content) - f.flush() - - try: - parser = MainSBMLParser(f.name) - level, version, model = parser.detect_version_and_level() - - assert level == 3 - assert version == 2 - assert model is not None - assert parser.level == 3 - assert parser.version == 2 - assert parser.model is not None - - finally: - os.unlink(f.name) - - -class TestSBMLParsingIntegration: - """Integration tests for SBML parsing.""" - - def create_test_sbml_file(self, level, version, content_additions=""): - """Helper to create test SBML files.""" - base_content = f''' - - - - - - - - - - - - - - - - - - k - A - - - {'' if level == 3 else ''} - - - - {content_additions} - -''' - - temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) - temp_file.write(base_content) - temp_file.flush() - temp_file.close() - return temp_file.name - - def test_level_2_version_4_parsing(self): - """Test parsing Level 2 Version 4 files.""" - test_file = self.create_test_sbml_file(2, 4) - - try: - parser = MainSBMLParser(test_file) - result = parser.process() - - assert isinstance(result, dict) - assert 'sbml_info' in result - assert result['sbml_info']['level'] == 2 - assert result['sbml_info']['version'] == 4 - - finally: - os.unlink(test_file) - - def test_level_2_version_5_parsing(self): - """Test parsing Level 2 Version 5 files.""" - test_file = self.create_test_sbml_file(2, 5) - - try: - parser = MainSBMLParser(test_file) - result = parser.process() - - assert isinstance(result, dict) - assert 'sbml_info' in result - assert result['sbml_info']['level'] == 2 - assert result['sbml_info']['version'] == 5 - - finally: - os.unlink(test_file) - - def test_level_3_version_1_parsing(self): - """Test parsing Level 3 Version 1 files.""" - test_file = self.create_test_sbml_file(3, 1) - - try: - parser = MainSBMLParser(test_file) - result = parser.process() - - assert isinstance(result, dict) - assert 'sbml_info' in result - assert result['sbml_info']['level'] == 3 - assert result['sbml_info']['version'] == 1 - - finally: - os.unlink(test_file) - - def test_level_3_version_2_parsing(self): - """Test parsing Level 3 Version 2 files.""" - test_file = self.create_test_sbml_file(3, 2) - - try: - parser = MainSBMLParser(test_file) - result = parser.process() - - assert isinstance(result, dict) - assert 'sbml_info' in result - assert result['sbml_info']['level'] == 3 - assert result['sbml_info']['version'] == 2 - - finally: - os.unlink(test_file) - - def test_unsupported_version_raises_error(self): - """Test that unsupported versions raise appropriate errors.""" - # Create a Level 1 SBML file (unsupported) - sbml_content = ''' - - - - - - -''' - - with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: - f.write(sbml_content) - f.flush() - - try: - parser = MainSBMLParser(f.name) - - with pytest.raises(UnsupportedSBMLVersionError) as excinfo: - parser.process() - - assert "Level 1 Version 2 is not supported" in str(excinfo.value) - - finally: - os.unlink(f.name) - - def test_malformed_sbml_raises_error(self): - """Test that malformed SBML raises parsing error.""" - malformed_content = ''' - - - - - - -''' - - with tempfile.NamedTemporaryFile(mode='w', suffix='.xml', delete=False) as f: - f.write(malformed_content) - f.flush() - - try: - parser = MainSBMLParser(f.name) - # Should not raise error during parsing, but might log warnings - result = parser.process() - assert isinstance(result, dict) - - finally: - os.unlink(f.name) - - def teardown_method(self): - """Clean up any temporary files.""" - pass - - -if __name__ == "__main__": - pytest.main([__file__])