From 1570d8d39d8ef6fc31125e7ee14c8ae6a159c9ec Mon Sep 17 00:00:00 2001
From: Thomas Barbazuk <tbbarbazuk@gmail.com>
Date: Sun, 3 Aug 2025 01:54:46 +0200
Subject: [PATCH 1/4] cleaned up before running on more pathways

---
 src/logic_network_generator.py | 72 ++++++++++++++--------------
 src/pathway_generator.py       |  2 +-
 src/reaction_generator.py      | 86 +++++++++++++++++++++++++++++-----
 3 files changed, 114 insertions(+), 46 deletions(-)

diff --git a/src/logic_network_generator.py b/src/logic_network_generator.py
index 3ae850c..8440046 100755
--- a/src/logic_network_generator.py
+++ b/src/logic_network_generator.py
@@ -10,7 +10,7 @@
 graph: Graph = Graph(uri, auth=("neo4j", "test"))
 
 
-def _get_reactome_id_from_hash(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> int:
+def get_reactome_id_from_hash(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> int:
     """Extract reactome_id for a given hash from decomposed_uid_mapping."""
     return decomposed_uid_mapping.loc[
         decomposed_uid_mapping["uid"] == hash_value, "reactome_id"
@@ -35,14 +35,14 @@ def create_reaction_id_map(
     
     rows = []
     for _, match in best_matches.iterrows():
-        incomming_hash = match["incomming"]
+        incoming_hash = match["incoming"]
         outgoing_hash = match["outgoing"]
-        reactome_id = _get_reactome_id_from_hash(decomposed_uid_mapping, incomming_hash)
+        reactome_id = get_reactome_id_from_hash(decomposed_uid_mapping, incoming_hash)
         
         row = {
             "uid": str(uuid.uuid4()),
             "reactome_id": int(reactome_id),
-            "input_hash": incomming_hash,
+            "input_hash": incoming_hash,
             "output_hash": outgoing_hash,
         }
         print("row")
@@ -55,39 +55,43 @@ def create_reaction_id_map(
 
 
 def create_uid_reaction_connections(
-    reaction_id_map: pd.DataFrame, 
-    best_matches: pd.DataFrame, 
+    reaction_id_map: pd.DataFrame,
+    best_matches: pd.DataFrame,
     decomposed_uid_mapping: pd.DataFrame
 ) -> pd.DataFrame:
     """Create connections between reaction UIDs based on best matches."""
-    
+
     reactome_id_to_uid_mapping = dict(
         zip(reaction_id_map["reactome_id"], reaction_id_map["uid"])
     )
-    
+   
     uid_reaction_connections_data = []
-    
+
     for _, match in best_matches.iterrows():
-        incomming_hash = match["incomming"]
+        incoming_hash = match["incoming"]
         outgoing_hash = match["outgoing"]
-        
+
         # Get reactome IDs for both hashes
-        preceding_reaction_id = _get_reactome_id_from_hash(decomposed_uid_mapping, incomming_hash)
-        following_reaction_id = _get_reactome_id_from_hash(decomposed_uid_mapping, outgoing_hash)
-        
+        preceding_reaction_id = get_reactome_id_from_hash(decomposed_uid_mapping, incoming_hash)
+        following_reaction_id = get_reactome_id_from_hash(decomposed_uid_mapping, outgoing_hash)
+
         # Get corresponding UIDs
         preceding_uid = reactome_id_to_uid_mapping.get(preceding_reaction_id)
         following_uid = reactome_id_to_uid_mapping.get(following_reaction_id)
-        
+
         # Only add connection if both UIDs exist
         if preceding_uid is not None and following_uid is not None:
             uid_reaction_connections_data.append({
-                "preceding_uid": preceding_uid, 
+                "preceding_uid": preceding_uid,
                 "following_uid": following_uid
             })
-    
-    return pd.DataFrame(uid_reaction_connections_data)
 
+    # Fix: Always create DataFrame with proper columns, even if empty
+    if uid_reaction_connections_data:
+        return pd.DataFrame(uid_reaction_connections_data)
+    else:
+        # Return empty DataFrame with correct column structure
+        return pd.DataFrame(columns=["preceding_uid", "following_uid"])
 
 def _execute_regulator_query(
     graph: Graph, 
@@ -213,29 +217,29 @@ def get_negative_regulators_for_reaction(
     )
 
 
-def _get_non_null_values(df: pd.DataFrame, column: str) -> List[Any]:
+def get_non_null_values(df: pd.DataFrame, column: str) -> List[Any]:
     """Extract non-null values from a DataFrame column."""
     return [value for value in df[column].tolist() if pd.notna(value)]
 
 
-def _get_hash_for_reaction(reaction_id_map: pd.DataFrame, uid: str, hash_type: str) -> str:
+def get_hash_for_reaction(reaction_id_map: pd.DataFrame, uid: str, hash_type: str) -> str:
     """Get input_hash or output_hash for a given reaction UID."""
     return reaction_id_map.loc[
         reaction_id_map["uid"] == uid, hash_type
     ].iloc[0]
 
 
-def _extract_uid_and_reactome_values(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> tuple:
+def extract_uid_and_reactome_values(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> tuple:
     """Extract UID and Reactome ID values for a given hash."""
     filtered_rows = decomposed_uid_mapping[decomposed_uid_mapping["uid"] == hash_value]
     
-    uid_values = _get_non_null_values(filtered_rows, "input_or_output_uid")
-    reactome_id_values = _get_non_null_values(filtered_rows, "input_or_output_reactome_id")
+    uid_values = get_non_null_values(filtered_rows, "input_or_output_uid")
+    reactome_id_values = get_non_null_values(filtered_rows, "input_or_output_reactome_id")
     
     return uid_values, reactome_id_values
 
 
-def _assign_uuids(reactome_ids: List[str], reactome_id_to_uuid: Dict[str, str]) -> List[str]:
+def assign_uuids(reactome_ids: List[str], reactome_id_to_uuid: Dict[str, str]) -> List[str]:
     """Assign UUIDs to Reactome IDs, creating new ones if they don't exist."""
     return [
         reactome_id_to_uuid.setdefault(reactome_id, str(uuid.uuid4()))
@@ -243,7 +247,7 @@ def _assign_uuids(reactome_ids: List[str], reactome_id_to_uuid: Dict[str, str])
     ]
 
 
-def _determine_edge_properties(input_uid_values: List[Any]) -> tuple:
+def determine_edge_properties(input_uid_values: List[Any]) -> tuple:
     """Determine and_or and edge_type based on input UID values."""
     if input_uid_values:
         return "and", "input"
@@ -251,7 +255,7 @@ def _determine_edge_properties(input_uid_values: List[Any]) -> tuple:
         return "or", "output"
 
 
-def _add_pathway_connections(
+def add_pathway_connections(
     input_uuids: List[str], 
     output_uuids: List[str], 
     and_or: str, 
@@ -283,8 +287,8 @@ def extract_inputs_and_outputs(
     
     for reaction_uid in reaction_uids:
         # Extract input information
-        input_hash = _get_hash_for_reaction(reaction_id_map, reaction_uid, "input_hash")
-        input_uid_values, input_reactome_id_values = _extract_uid_and_reactome_values(
+        input_hash = get_hash_for_reaction(reaction_id_map, reaction_uid, "input_hash")
+        input_uid_values, input_reactome_id_values = extract_uid_and_reactome_values(
             decomposed_uid_mapping, input_hash
         )
         
@@ -295,20 +299,20 @@ def extract_inputs_and_outputs(
         
         for preceding_uid in preceding_uids:
             # Extract output information
-            output_hash = _get_hash_for_reaction(reaction_id_map, preceding_uid, "output_hash")
-            output_uid_values, output_reactome_id_values = _extract_uid_and_reactome_values(
+            output_hash = get_hash_for_reaction(reaction_id_map, preceding_uid, "output_hash")
+            output_uid_values, output_reactome_id_values = extract_uid_and_reactome_values(
                 decomposed_uid_mapping, output_hash
             )
             
             # Assign UUIDs
-            input_uuids = _assign_uuids(input_reactome_id_values, reactome_id_to_uuid)
-            output_uuids = _assign_uuids(output_reactome_id_values, reactome_id_to_uuid)
+            input_uuids = assign_uuids(input_reactome_id_values, reactome_id_to_uuid)
+            output_uuids = assign_uuids(output_reactome_id_values, reactome_id_to_uuid)
             
             # Determine edge properties
-            and_or, edge_type = _determine_edge_properties(input_uid_values)
+            and_or, edge_type = determine_edge_properties(input_uid_values)
             
             # Add connections to pathway network
-            _add_pathway_connections(
+            add_pathway_connections(
                 input_uuids, output_uuids, and_or, edge_type, pathway_logic_network_data
             )
 
diff --git a/src/pathway_generator.py b/src/pathway_generator.py
index 53440e0..57322b4 100755
--- a/src/pathway_generator.py
+++ b/src/pathway_generator.py
@@ -43,7 +43,7 @@ def generate_pathway_file(
             pathway_id, reaction_connections
         )
         best_matches = pd.DataFrame(
-            best_matches_list, columns=["incomming", "outgoing"]
+            best_matches_list, columns=["incoming", "outgoing"]
         )
         decomposed_uid_mapping.to_csv(decomposed_uid_mapping_file, index=False)
         best_matches.to_csv(best_matches_file, index=False)
diff --git a/src/reaction_generator.py b/src/reaction_generator.py
index ba5fc79..b2e8f2d 100755
--- a/src/reaction_generator.py
+++ b/src/reaction_generator.py
@@ -2,7 +2,7 @@
 import itertools
 import uuid
 import warnings
-from typing import Any, Dict, List, Set, Tuple
+from typing import Any, Dict, List, Set, Tuple, Optional
 
 import pandas as pd
 
@@ -31,6 +31,28 @@
 ReactomeID = str
 DataFrameRow = Dict[str, Any]
 
+# Enhanced functional equivalence groups configuration
+FUNCTIONAL_EQUIVALENT_GROUPS = {
+    # Ubiquitin - classic case of functionally equivalent isoforms
+    "68524": {
+        "group_name": "ubiquitin_isoforms",
+        "strategy": "use_canonical",
+        "canonical_representative": "68524",
+        "description": "Ubiquitin isoforms - all serve identical protein degradation signaling function",
+        "biological_rationale": "Multiple ubiquitin genes produce identical/near-identical proteins"
+    },
+    
+    # Template for other functional groups
+    # "entity_id": {
+    #     "group_name": "group_name",
+    #     "strategy": "use_canonical",  # or "representative_sampling", "context_dependent"
+    #     "canonical_representative": "representative_id",
+    #     "description": "Brief description",
+    #     "biological_rationale": "Why these are functionally equivalent"
+    # }
+}
+
+
 decomposed_uid_mapping = pd.DataFrame(
     columns=decomposed_uid_mapping_column_types.keys()
 ).astype(  # type: ignore
@@ -41,8 +63,6 @@
 
 
 def get_component_id_or_reference_entity_id(reactome_id):
-    global reference_entity_dict
-
     if reactome_id in reference_entity_dict:
         component_id = reference_entity_dict[reactome_id]
         return component_id
@@ -61,6 +81,26 @@ def is_valid_uuid(identifier: Any) -> bool:
     return True if len(identifier) == 64 else False
 
 
+def is_functionally_equivalent_entity(entity_id: str) -> Optional[Dict[str, Any]]:
+    """Check if entity is part of a functional equivalence group."""
+    return FUNCTIONAL_EQUIVALENT_GROUPS.get(entity_id)
+
+
+def apply_functional_equivalence_strategy(
+    entity_id: str, 
+    functional_group: Dict[str, Any],
+    original_result: Set[str]
+) -> Set[str]:
+    """Apply functional equivalence strategy to entity decomposition."""
+    strategy = functional_group.get("strategy", "use_canonical")
+    
+    if strategy == "use_canonical":
+        canonical_rep = functional_group.get("canonical_representative", entity_id)
+        return {canonical_rep}
+    else:
+        return original_result
+
+
 def get_broken_apart_ids(
     broken_apart_members: list[set[str]], reactome_id: ReactomeID
 ) -> Set[UID]:
@@ -77,16 +117,20 @@ def get_broken_apart_ids(
                 new_broken_apart_members.append({member})
 
         iterproduct_components = list(itertools.product(*new_broken_apart_members))
+        total_combinations = len(iterproduct_components)
+        
+        if total_combinations > 1000:
+            logger.warning(f"Large combination set for reaction {reactome_id}: {total_combinations} combinations")
+
         iterproduct_components_as_sets = [
             set(map(str, item)) for item in iterproduct_components
         ]
-        uids = get_uids_for_iterproduct_components(
-            iterproduct_components_as_sets, reactome_id
-        )
+        uids = get_uids_for_iterproduct_components(iterproduct_components_as_sets, reactome_id)
     else:
         uid = str(uuid.uuid4())
         rows: List[DataFrameRow] = []
         row: DataFrameRow
+        
         for member in broken_apart_members:
             if is_valid_uuid(member):
                 component_ids = decomposed_uid_mapping.loc[
@@ -110,13 +154,14 @@ def get_broken_apart_ids(
                     "component_id": member,
                     "reactome_id": reactome_id,
                     "component_id_or_reference_entity_id": get_component_id_or_reference_entity_id(
-                        component_id
+                        member  # Fixed: was using undefined 'component_id'
                     ),
                     "input_or_output_uid": None,
                     "input_or_output_reactome_id": member,
                 }
                 rows.append(row)
         uids = {uid}
+
         decomposed_uid_mapping = pd.concat([decomposed_uid_mapping, pd.DataFrame(rows)])
 
     return uids
@@ -131,6 +176,7 @@ def get_uids_for_iterproduct_components(
     uids: Set[UID] = set()
     for component in iterproduct_components:
         component_to_input_or_output: Dict[ComponentID, InputOutputID] = {}
+        
         for item in component:
             if is_valid_uuid(item):
                 selected_rows = decomposed_uid_mapping.loc[
@@ -173,7 +219,23 @@ def get_uids_for_iterproduct_components(
 
 
 def break_apart_entity(entity_id: int) -> Set[str]:
-    """Break apart entity."""
+    """Break apart entity with functional equivalence support."""
+    global decomposed_uid_mapping
+
+    entity_str = str(entity_id)
+
+    # Check for functional equivalence rules first
+    functional_group = is_functionally_equivalent_entity(entity_str)
+    if functional_group:
+        original_result = break_apart_entity_standard(entity_id)
+        return apply_functional_equivalence_strategy(entity_str, functional_group, original_result)
+
+    # Use original decomposition logic
+    return break_apart_entity_standard(entity_id)
+
+
+def break_apart_entity_standard(entity_id: int) -> Set[str]:
+    """Standard break_apart_entity logic."""
     global decomposed_uid_mapping
 
     labels = get_labels(entity_id)
@@ -197,12 +259,12 @@ def break_apart_entity(entity_id: int) -> Set[str]:
         contains_thing = contains_reference_gene_product_molecule_or_isoform(entity_id)
         if contains_thing:
             return set([str(entity_id)])
+        
         member_ids = get_set_members(entity_id)
 
         member_list: List[str] = []
         for member_id in member_ids:
             members = break_apart_entity(member_id)
-
             if isinstance(members, set):
                 member_list.extend(members)
             else:
@@ -232,7 +294,6 @@ def break_apart_entity(entity_id: int) -> Set[str]:
             "SimpleEntity",
         ]
     ):
-
         return {str(entity_id)}
 
     else:
@@ -277,7 +338,10 @@ def get_decomposed_uid_mapping(
     """Get decomposed UID mapping."""
     global decomposed_uid_mapping
 
-    decomposed_uid_mapping.drop(decomposed_uid_mapping.index, inplace=True)
+    # Fix: Properly reinitialize instead of dropping rows
+    decomposed_uid_mapping = pd.DataFrame(
+        columns=decomposed_uid_mapping_column_types.keys()
+    ).astype(decomposed_uid_mapping_column_types)
 
     reaction_ids = pd.unique(
         reaction_connections[

From cba91a812b9e07cbe90bf6000830880ec30dea3e Mon Sep 17 00:00:00 2001
From: Thomas Barbazuk <tbbarbazuk@gmail.com>
Date: Mon, 4 Aug 2025 18:44:27 +0200
Subject: [PATCH 2/4] added in stoichiomtery. Runs well, more changes
 fortcoming

---
 src/decomposed_uid_mapping.py  |   3 +
 src/logic_network_generator.py | 105 ++++++++++++++++++++----
 src/neo4j_connector.py         |  46 ++++++++---
 src/reaction_generator.py      | 145 ++++++++++++++++++++++++---------
 4 files changed, 233 insertions(+), 66 deletions(-)

diff --git a/src/decomposed_uid_mapping.py b/src/decomposed_uid_mapping.py
index 384f0e5..85fd5b0 100644
--- a/src/decomposed_uid_mapping.py
+++ b/src/decomposed_uid_mapping.py
@@ -7,4 +7,7 @@
     "component_id_or_reference_entity_id": pd.Int64Dtype(),
     "input_or_output_uid": str,
     "input_or_output_reactome_id": pd.Int64Dtype(),
+    "stoichiometry": pd.Int64Dtype(),
+    "entity_stoichiometry": pd.Int64Dtype(),  # For original entity stoichiometry
+    "component_stoichiometry": pd.Int64Dtype(),  # For complex component stoichiometry
 }
diff --git a/src/logic_network_generator.py b/src/logic_network_generator.py
index 8440046..01f9edc 100755
--- a/src/logic_network_generator.py
+++ b/src/logic_network_generator.py
@@ -122,7 +122,7 @@ def _execute_regulator_query(
 
 
 def get_catalysts_for_reaction(reaction_id_map: DataFrame, graph: Graph) -> DataFrame:
-    """Get catalysts for reactions using Neo4j graph queries."""
+    """Get catalysts for reactions with stoichiometry using Neo4j graph queries."""
     catalyst_list = []
     
     for _, row in reaction_id_map.iterrows():
@@ -130,8 +130,9 @@ def get_catalysts_for_reaction(reaction_id_map: DataFrame, graph: Graph) -> Data
         reaction_uuid = row["uid"]
         
         query = (
-            f"MATCH (reaction:ReactionLikeEvent{{dbId: {reaction_id}}})-[:catalystActivity]->(catalystActivity:CatalystActivity)-[:physicalEntity]->(catalyst:PhysicalEntity) "
-            f"RETURN reaction.dbId AS reaction_id, catalyst.dbId AS catalyst_id, 'catalyst' AS edge_type"
+            f"MATCH (reaction:ReactionLikeEvent{{dbId: {reaction_id}}})-[r:catalystActivity]->(catalystActivity:CatalystActivity)-[:physicalEntity]->(catalyst:PhysicalEntity) "
+            f"RETURN reaction.dbId AS reaction_id, catalyst.dbId AS catalyst_id, "
+            f"COALESCE(r.stoichiometry, 1) AS stoichiometry, 'catalyst' AS edge_type"
         )
         
         try:
@@ -147,7 +148,7 @@ def get_catalysts_for_reaction(reaction_id_map: DataFrame, graph: Graph) -> Data
     
     return pd.DataFrame(
         catalyst_list,
-        columns=["reaction_id", "catalyst_id", "edge_type", "uuid", "reaction_uuid"],
+        columns=["reaction_id", "catalyst_id", "stoichiometry", "edge_type", "uuid", "reaction_uuid"],
     )
 
 
@@ -283,27 +284,37 @@ def extract_inputs_and_outputs(
     reactome_id_to_uuid: Dict[str, str],
     pathway_logic_network_data: List[Dict[str, Any]],
 ) -> None:
-    """Extract inputs and outputs for reactions and add them to the pathway network."""
+    """Extract inputs and outputs with stoichiometry support."""
     
     for reaction_uid in reaction_uids:
-        # Extract input information
+        # Extract input information WITH stoichiometry
         input_hash = get_hash_for_reaction(reaction_id_map, reaction_uid, "input_hash")
         input_uid_values, input_reactome_id_values = extract_uid_and_reactome_values(
             decomposed_uid_mapping, input_hash
         )
         
+        # NEW: Extract stoichiometry for inputs
+        input_stoichiometry = extract_stoichiometry_for_entity(
+            decomposed_uid_mapping, input_hash, input_reactome_id_values
+        )
+        
         # Process preceding reactions (outputs)
         preceding_uids = uid_reaction_connections[
             uid_reaction_connections["following_uid"] == reaction_uid
         ]["preceding_uid"].tolist()
         
         for preceding_uid in preceding_uids:
-            # Extract output information
+            # Extract output information WITH stoichiometry
             output_hash = get_hash_for_reaction(reaction_id_map, preceding_uid, "output_hash")
             output_uid_values, output_reactome_id_values = extract_uid_and_reactome_values(
                 decomposed_uid_mapping, output_hash
             )
             
+            # NEW: Extract stoichiometry for outputs
+            output_stoichiometry = extract_stoichiometry_for_entity(
+                decomposed_uid_mapping, output_hash, output_reactome_id_values
+            )
+            
             # Assign UUIDs
             input_uuids = assign_uuids(input_reactome_id_values, reactome_id_to_uuid)
             output_uuids = assign_uuids(output_reactome_id_values, reactome_id_to_uuid)
@@ -311,9 +322,11 @@ def extract_inputs_and_outputs(
             # Determine edge properties
             and_or, edge_type = determine_edge_properties(input_uid_values)
             
-            # Add connections to pathway network
-            add_pathway_connections(
-                input_uuids, output_uuids, and_or, edge_type, pathway_logic_network_data
+            # Add connections with stoichiometry
+            add_pathway_connections_with_stoichiometry(
+                input_uuids, output_uuids, 
+                input_stoichiometry, output_stoichiometry,
+                and_or, edge_type, pathway_logic_network_data
             )
 
 
@@ -326,25 +339,29 @@ def append_regulators(
     and_or: str,
     edge_type: str,
 ) -> None:
-    """Append regulatory relationships to the pathway network."""
+    """Append regulatory relationships with stoichiometry."""
     
     regulator_configs = [
         (catalyst_map, "pos", "catalyst"),
-        (negative_regulator_map, "neg", "regulator"),
+        (negative_regulator_map, "neg", "regulator"),  
         (positive_regulator_map, "pos", "regulator"),
     ]
     
     for map_df, pos_neg, edge_type_override in regulator_configs:
         for _, row in map_df.iterrows():
+            stoichiometry = row.get("stoichiometry", 1)  # Get stoichiometry if available
+            
             pathway_logic_network_data.append({
                 "source_id": row["uuid"],
                 "target_id": row["reaction_uuid"],
                 "pos_neg": pos_neg,
                 "and_or": and_or,
                 "edge_type": edge_type_override,
+                "source_stoichiometry": stoichiometry,
+                "target_stoichiometry": 1,  # Reactions are typically 1:1 with regulators
+                "stoichiometry_ratio": 1.0,
             })
 
-
 def _calculate_reaction_statistics(reaction_connections: pd.DataFrame) -> None:
     """Calculate and print statistics about reactions without preceding events."""
     reactions_without_preceding_events = reaction_connections[
@@ -375,21 +392,75 @@ def _print_regulator_statistics(
     )
 
 
+def extract_stoichiometry_for_entity(
+    decomposed_uid_mapping: pd.DataFrame, 
+    hash_value: str, 
+    entity_reactome_ids: List[str]
+) -> Dict[str, int]:
+    """Extract stoichiometry information for entities."""
+    stoichiometry_map = {}
+    
+    for reactome_id in entity_reactome_ids:
+        # Get stoichiometry from decomposed mapping
+        filtered_rows = decomposed_uid_mapping[
+            (decomposed_uid_mapping["uid"] == hash_value) & 
+            (decomposed_uid_mapping["input_or_output_reactome_id"] == reactome_id)
+        ]
+        
+        if not filtered_rows.empty:
+            stoichiometry = filtered_rows["stoichiometry"].iloc[0]
+            stoichiometry_map[reactome_id] = stoichiometry if pd.notna(stoichiometry) else 1
+        else:
+            stoichiometry_map[reactome_id] = 1
+    
+    return stoichiometry_map
+
+def add_pathway_connections_with_stoichiometry(
+    input_uuids: List[str], 
+    output_uuids: List[str], 
+    input_stoichiometry: Dict[str, int],
+    output_stoichiometry: Dict[str, int],
+    and_or: str, 
+    edge_type: str,
+    pathway_logic_network_data: List[Dict[str, Any]]
+) -> None:
+    """Add connections with stoichiometry information."""
+    
+    for input_uuid in input_uuids:
+        for output_uuid in output_uuids:
+            # Calculate stoichiometry ratio
+            input_stoich = input_stoichiometry.get(input_uuid, 1)
+            output_stoich = output_stoichiometry.get(output_uuid, 1)
+            stoich_ratio = output_stoich / input_stoich if input_stoich > 0 else 1.0
+            
+            pathway_logic_network_data.append({
+                "source_id": input_uuid,
+                "target_id": output_uuid,
+                "pos_neg": "pos",
+                "and_or": and_or,
+                "edge_type": edge_type,
+                "source_stoichiometry": input_stoich,
+                "target_stoichiometry": output_stoich,
+                "stoichiometry_ratio": stoich_ratio,
+            })
+
 def create_pathway_logic_network(
     decomposed_uid_mapping: pd.DataFrame,
     reaction_connections: pd.DataFrame,
     best_matches: Any,
 ) -> pd.DataFrame:
-    """Create a pathway logic network from decomposed UID mappings and reaction connections."""
-    logger.debug("Adding reaction pairs to pathway_logic_network")
-
-    # Initialize data structures
+    """Create a pathway logic network with stoichiometry support."""
+    
+    # Enhanced column structure
     columns = {
         "source_id": pd.Series(dtype="Int64"),
         "target_id": pd.Series(dtype="Int64"),
         "pos_neg": pd.Series(dtype="str"),
         "and_or": pd.Series(dtype="str"),
         "edge_type": pd.Series(dtype="str"),
+        "source_stoichiometry": pd.Series(dtype="Int64"),  # NEW
+        "target_stoichiometry": pd.Series(dtype="Int64"),  # NEW
+        "stoichiometry_ratio": pd.Series(dtype="float"),   # NEW
     }
     pathway_logic_network_data = []
     
diff --git a/src/neo4j_connector.py b/src/neo4j_connector.py
index 66bf4fb..502afc4 100755
--- a/src/neo4j_connector.py
+++ b/src/neo4j_connector.py
@@ -65,21 +65,23 @@ def get_labels(entity_id: int) -> List[str]:
         raise
 
 
-def get_complex_components(entity_id: int) -> Set[int]:
+def get_complex_components_with_stoichiometry(entity_id: int) -> List[Dict[str, Any]]:
     query_get_components_template: str = """
-       MATCH (entity)-[:hasComponent]->(component)
+       MATCH (entity)-[r:hasComponent]->(component)
            WHERE entity.dbId = %s
-       RETURN collect(component.dbId) AS component_ids
+       RETURN component.dbId AS component_id,
+              COALESCE(r.stoichiometry, 1) AS stoichiometry
        """
     query: str = query_get_components_template % entity_id
 
     try:
-        return set(graph.run(query).data()[0]["component_ids"])
+        result = graph.run(query).data()
+        print(f"Complex {entity_id} components with stoichiometry: {result}")
+        return result
     except Exception:
-        logger.error("Error in get_complex_components", exc_info=True)
+        logger.error("Error in get_complex_components_with_stoichiometry", exc_info=True)
         raise
 
-
 def get_set_members(entity_id: int) -> Set[int]:
     query_get_members_template: str = """
         MATCH (entity)-[:hasCandidate|hasMember]->(member)
@@ -111,22 +113,24 @@ def get_reactions(pathway_id: int, taxon_id: str) -> List[int]:
         raise
 
 
-def get_reaction_input_output_ids(reaction_id: int, input_or_output: str) -> Set[int]:
+def get_reaction_input_output_ids_with_stoichiometry(reaction_id: int, input_or_output: str) -> List[Dict[str, Any]]:
     query_template: str = """
-       MATCH (reaction)-[:%s]-(io)
+       MATCH (reaction)-[r:%s]->(io)
            WHERE (reaction:Reaction OR reaction:ReactionLikeEvent) AND reaction.dbId=%s
-       RETURN COLLECT(io.dbId) AS io_ids
+       RETURN io.dbId AS entity_id,
+              COALESCE(r.stoichiometry, 1) AS stoichiometry
     """
     relation_type: str = "input" if input_or_output == "input" else "output"
     query: str = query_template % (relation_type, reaction_id)
 
     try:
-        return set(graph.run(query).data()[0]["io_ids"])
+        result = graph.run(query).data()
+        print(f"Reaction {reaction_id} {input_or_output}s with stoichiometry: {result}")
+        return result
     except Exception:
-        logger.error("Error in get_reaction_input_output_ids", exc_info=True)
+        logger.error("Error in get_reaction_input_output_ids_with_stoichiometry", exc_info=True)
         raise
 
-
 def get_reference_entity_id(entity_id: int) -> Union[str, None]:
     query_template: str = """
         MATCH (reference_database:ReferenceDatabase)<-[:referenceDatabase]-(reference_entity_gene:ReferenceEntity)<-[:referenceGene]-(reference_entity:ReferenceEntity)<-[:referenceEntity]-(pe:PhysicalEntity)
@@ -164,3 +168,21 @@ def contains_reference_gene_product_molecule_or_isoform(entity_id: int) -> bool:
             exc_info=True,
         )
         raise e
+
+
+def get_catalyst_stoichiometry(reaction_id: int) -> List[Dict[str, Any]]:
+    query_template: str = """
+       MATCH (reaction:ReactionLikeEvent)-[r:catalystActivity]->(ca:CatalystActivity)-[:physicalEntity]->(catalyst:PhysicalEntity)
+           WHERE reaction.dbId = %s
+       RETURN catalyst.dbId AS catalyst_id,
+              COALESCE(r.stoichiometry, 1) AS stoichiometry
+    """
+    query: str = query_template % reaction_id
+
+    try:
+        result = graph.run(query).data()
+        print(f"Reaction {reaction_id} catalysts with stoichiometry: {result}")
+        return result
+    except Exception:
+        logger.error("Error in get_catalyst_stoichiometry", exc_info=True)
+        raise
diff --git a/src/reaction_generator.py b/src/reaction_generator.py
index b2e8f2d..5bfd287 100755
--- a/src/reaction_generator.py
+++ b/src/reaction_generator.py
@@ -11,9 +11,9 @@
 from src.decomposed_uid_mapping import decomposed_uid_mapping_column_types
 from src.neo4j_connector import (
     contains_reference_gene_product_molecule_or_isoform,
-    get_complex_components,
+    get_complex_components_with_stoichiometry,  # MODIFIED IMPORT
     get_labels,
-    get_reaction_input_output_ids,
+    get_reaction_input_output_ids_with_stoichiometry,  # MODIFIED IMPORT
     get_reference_entity_id,
     get_set_members,
 )
@@ -41,23 +41,19 @@
         "description": "Ubiquitin isoforms - all serve identical protein degradation signaling function",
         "biological_rationale": "Multiple ubiquitin genes produce identical/near-identical proteins"
     },
-    
-    # Template for other functional groups
-    # "entity_id": {
-    #     "group_name": "group_name",
-    #     "strategy": "use_canonical",  # or "representative_sampling", "context_dependent"
-    #     "canonical_representative": "representative_id",
-    #     "description": "Brief description",
-    #     "biological_rationale": "Why these are functionally equivalent"
-    # }
 }
 
+# MODIFIED: Add stoichiometry columns to the schema
+enhanced_decomposed_uid_mapping_column_types = {
+    **decomposed_uid_mapping_column_types,
+    "stoichiometry": pd.Int64Dtype(),
+    "entity_stoichiometry": pd.Int64Dtype(),
+    "component_stoichiometry": pd.Int64Dtype(),
+}
 
 decomposed_uid_mapping = pd.DataFrame(
-    columns=decomposed_uid_mapping_column_types.keys()
-).astype(  # type: ignore
-    decomposed_uid_mapping_column_types
-)
+    columns=enhanced_decomposed_uid_mapping_column_types.keys()
+).astype(enhanced_decomposed_uid_mapping_column_types)
 
 reference_entity_dict: Dict[str, str] = {}
 
@@ -102,11 +98,16 @@ def apply_functional_equivalence_strategy(
 
 
 def get_broken_apart_ids(
-    broken_apart_members: list[set[str]], reactome_id: ReactomeID
+    broken_apart_members: list[set[str]], 
+    reactome_id: ReactomeID,
+    stoichiometry_map: Dict[str, int] = None  # NEW PARAMETER
 ) -> Set[UID]:
-    """Get broken apart IDs."""
+    """Get broken apart IDs with stoichiometry support."""
     global decomposed_uid_mapping
 
+    if stoichiometry_map is None:
+        stoichiometry_map = {}
+
     uids: Set[UID]
     if any(isinstance(member, set) for member in broken_apart_members):
         new_broken_apart_members = []
@@ -125,7 +126,9 @@ def get_broken_apart_ids(
         iterproduct_components_as_sets = [
             set(map(str, item)) for item in iterproduct_components
         ]
-        uids = get_uids_for_iterproduct_components(iterproduct_components_as_sets, reactome_id)
+        uids = get_uids_for_iterproduct_components(
+            iterproduct_components_as_sets, reactome_id, stoichiometry_map
+        )
     else:
         uid = str(uuid.uuid4())
         rows: List[DataFrameRow] = []
@@ -137,6 +140,9 @@ def get_broken_apart_ids(
                     decomposed_uid_mapping["uid"] == member, "component_id"
                 ].tolist()
                 for component_id in component_ids:
+                    # NEW: Add stoichiometry information
+                    stoichiometry = stoichiometry_map.get(component_id, 1)
+                    
                     row = {
                         "uid": uid,
                         "component_id": component_id,
@@ -146,18 +152,27 @@ def get_broken_apart_ids(
                         ),
                         "input_or_output_uid": member,
                         "input_or_output_reactome_id": None,
+                        "stoichiometry": stoichiometry,  # NEW
+                        "entity_stoichiometry": stoichiometry,  # NEW
+                        "component_stoichiometry": 1,  # NEW
                     }
                     rows.append(row)
             else:
+                # NEW: Add stoichiometry information
+                stoichiometry = stoichiometry_map.get(member, 1)
+                
                 row = {
                     "uid": uid,
                     "component_id": member,
                     "reactome_id": reactome_id,
                     "component_id_or_reference_entity_id": get_component_id_or_reference_entity_id(
-                        member  # Fixed: was using undefined 'component_id'
+                        member
                     ),
                     "input_or_output_uid": None,
                     "input_or_output_reactome_id": member,
+                    "stoichiometry": stoichiometry,  # NEW
+                    "entity_stoichiometry": stoichiometry,  # NEW
+                    "component_stoichiometry": 1,  # NEW
                 }
                 rows.append(row)
         uids = {uid}
@@ -168,14 +183,20 @@ def get_broken_apart_ids(
 
 
 def get_uids_for_iterproduct_components(
-    iterproduct_components: List[Set[ComponentID]], reactome_id: ReactomeID
+    iterproduct_components: List[Set[ComponentID]], 
+    reactome_id: ReactomeID,
+    stoichiometry_map: Dict[str, int] = None  # NEW PARAMETER
 ) -> Set[UID]:
-    """Get UID for iterproduct components."""
+    """Get UID for iterproduct components with stoichiometry support."""
     global decomposed_uid_mapping
 
+    if stoichiometry_map is None:
+        stoichiometry_map = {}
+
     uids: Set[UID] = set()
     for component in iterproduct_components:
         component_to_input_or_output: Dict[ComponentID, InputOutputID] = {}
+        component_stoichiometries: Dict[ComponentID, int] = {}  # NEW
         
         for item in component:
             if is_valid_uuid(item):
@@ -185,12 +206,17 @@ def get_uids_for_iterproduct_components(
                 for index, selected_row in selected_rows.iterrows():
                     component_id = selected_row["component_id"]
                     component_to_input_or_output[component_id] = item
+                    # NEW: Get stoichiometry from existing data
+                    component_stoichiometries[component_id] = selected_row.get("stoichiometry", 1)
             else:
                 component_to_input_or_output[item] = item
+                # NEW: Get stoichiometry from the map
+                component_stoichiometries[item] = stoichiometry_map.get(item, 1)
 
-        uid = hashlib.sha256(
-            str(sorted(component_to_input_or_output.values())).encode()
-        ).hexdigest()
+        # MODIFIED: Include stoichiometry in UID calculation
+        uid_data = [(comp, io, component_stoichiometries.get(comp, 1)) 
+                   for comp, io in sorted(component_to_input_or_output.items())]
+        uid = hashlib.sha256(str(uid_data).encode()).hexdigest()
 
         rows: List[DataFrameRow] = []
         for component_id, input_or_output_id in component_to_input_or_output.items():
@@ -200,6 +226,10 @@ def get_uids_for_iterproduct_components(
             input_or_output_reactome_id = (
                 input_or_output_id if not is_valid_uuid(input_or_output_id) else None
             )
+            
+            # NEW: Get stoichiometry information
+            stoichiometry = component_stoichiometries.get(component_id, 1)
+            
             row: DataFrameRow = {
                 "uid": uid,
                 "component_id": component_id,
@@ -209,6 +239,9 @@ def get_uids_for_iterproduct_components(
                 ),
                 "input_or_output_uid": input_or_output_uid,
                 "input_or_output_reactome_id": input_or_output_reactome_id,
+                "stoichiometry": stoichiometry,  # NEW
+                "entity_stoichiometry": stoichiometry,  # NEW
+                "component_stoichiometry": stoichiometry,  # NEW
             }
             rows.append(row)
 
@@ -235,7 +268,7 @@ def break_apart_entity(entity_id: int) -> Set[str]:
 
 
 def break_apart_entity_standard(entity_id: int) -> Set[str]:
-    """Standard break_apart_entity logic."""
+    """Standard break_apart_entity logic with stoichiometry support."""
     global decomposed_uid_mapping
 
     labels = get_labels(entity_id)
@@ -274,13 +307,27 @@ def break_apart_entity_standard(entity_id: int) -> Set[str]:
 
     elif "Complex" in labels:
         broken_apart_members: List[Set[str]] = []
-        member_ids = get_complex_components(entity_id)
+        
+        # MODIFIED: Get components with stoichiometry
+        component_data = get_complex_components_with_stoichiometry(entity_id)
+        print(f"Complex {entity_id} component data: {component_data}")
+        
+        component_stoichiometry = {}
+        member_ids = []
+        
+        for component_info in component_data:
+            component_id = component_info['component_id']
+            stoichiometry = component_info['stoichiometry']
+            component_stoichiometry[str(component_id)] = stoichiometry
+            member_ids.append(component_id)
 
         for member_id in member_ids:
             members = break_apart_entity(member_id)
             broken_apart_members.append(members)
 
-        return get_broken_apart_ids(broken_apart_members, str(entity_id))
+        return get_broken_apart_ids(
+            broken_apart_members, str(entity_id), component_stoichiometry
+        )
 
     elif any(
         entity_label in labels
@@ -302,25 +349,41 @@ def break_apart_entity_standard(entity_id: int) -> Set[str]:
 
 
 def decompose_by_reactions(reaction_ids: List[int]) -> List[Any]:
-    """Decompose by reactions."""
+    """Decompose by reactions with stoichiometry support."""
     global decomposed_uid_mapping
 
-    logger.debug("Decomposing reactions")
+    logger.debug("Decomposing reactions with stoichiometry")
 
     all_best_matches = []
     for reaction_id in reaction_ids:
-        input_ids = get_reaction_input_output_ids(reaction_id, "input")
+        print(f"Processing reaction {reaction_id}")
+        
+        # MODIFIED: Get inputs with stoichiometry
+        input_data = get_reaction_input_output_ids_with_stoichiometry(reaction_id, "input")
+        input_ids = [item['entity_id'] for item in input_data]
+        input_stoichiometry = {str(item['entity_id']): item['stoichiometry'] for item in input_data}
+        
+        print(f"Reaction {reaction_id} inputs: {input_ids}")
+        print(f"Input stoichiometry: {input_stoichiometry}")
+        
         broken_apart_input_id = [break_apart_entity(input_id) for input_id in input_ids]
         input_combinations = get_broken_apart_ids(
-            broken_apart_input_id, str(reaction_id)
+            broken_apart_input_id, str(reaction_id), input_stoichiometry
         )
 
-        output_ids = get_reaction_input_output_ids(reaction_id, "output")
+        # MODIFIED: Get outputs with stoichiometry
+        output_data = get_reaction_input_output_ids_with_stoichiometry(reaction_id, "output")
+        output_ids = [item['entity_id'] for item in output_data]
+        output_stoichiometry = {str(item['entity_id']): item['stoichiometry'] for item in output_data}
+        
+        print(f"Reaction {reaction_id} outputs: {output_ids}")
+        print(f"Output stoichiometry: {output_stoichiometry}")
+        
         broken_apart_output_id = [
             break_apart_entity(output_id) for output_id in output_ids
         ]
         output_combinations = get_broken_apart_ids(
-            broken_apart_output_id, str(reaction_id)
+            broken_apart_output_id, str(reaction_id), output_stoichiometry
         )
 
         [best_matches, _] = find_best_reaction_match(
@@ -335,13 +398,13 @@ def decompose_by_reactions(reaction_ids: List[int]) -> List[Any]:
 def get_decomposed_uid_mapping(
     pathway_id: str, reaction_connections: pd.DataFrame
 ) -> Tuple[pd.DataFrame, List[Any]]:
-    """Get decomposed UID mapping."""
+    """Get decomposed UID mapping with stoichiometry support."""
     global decomposed_uid_mapping
 
-    # Fix: Properly reinitialize instead of dropping rows
+    # MODIFIED: Use enhanced column types
     decomposed_uid_mapping = pd.DataFrame(
-        columns=decomposed_uid_mapping_column_types.keys()
-    ).astype(decomposed_uid_mapping_column_types)
+        columns=enhanced_decomposed_uid_mapping_column_types.keys()
+    ).astype(enhanced_decomposed_uid_mapping_column_types)
 
     reaction_ids = pd.unique(
         reaction_connections[
@@ -351,6 +414,14 @@ def get_decomposed_uid_mapping(
 
     reaction_ids = reaction_ids[~pd.isna(reaction_ids)]  # removing NA value from list
     reaction_ids = reaction_ids.astype(int).tolist()  # converting to integer
+    
+    print(f"Processing {len(reaction_ids)} reactions for pathway {pathway_id}")
+    
     best_matches = decompose_by_reactions(list(reaction_ids))
+    
+    print(f"Final decomposed_uid_mapping shape: {decomposed_uid_mapping.shape}")
+    print(f"Stoichiometry column summary:")
+    if 'stoichiometry' in decomposed_uid_mapping.columns:
+        print(decomposed_uid_mapping['stoichiometry'].value_counts())
 
     return (decomposed_uid_mapping, best_matches)

From 666e67d3f82a05ee4beeeaa08208e0d91c10cb80 Mon Sep 17 00:00:00 2001
From: Thomas Barbazuk <tbbarbazuk@gmail.com>
Date: Mon, 4 Aug 2025 18:48:28 +0200
Subject: [PATCH 3/4] added in stoichiomtery. Runs well, more changes
 fortcoming

---
 src/logic_network_generator.py | 348 +++++++++++++++++++--------------
 src/neo4j_connector.py         |  14 +-
 src/pathway_generator.py       |   4 +-
 src/reaction_generator.py      | 100 ++++++----
 4 files changed, 267 insertions(+), 199 deletions(-)

diff --git a/src/logic_network_generator.py b/src/logic_network_generator.py
index 01f9edc..b767ef6 100755
--- a/src/logic_network_generator.py
+++ b/src/logic_network_generator.py
@@ -10,7 +10,9 @@
 graph: Graph = Graph(uri, auth=("neo4j", "test"))
 
 
-def get_reactome_id_from_hash(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> int:
+def get_reactome_id_from_hash(
+    decomposed_uid_mapping: pd.DataFrame, hash_value: str
+) -> int:
     """Extract reactome_id for a given hash from decomposed_uid_mapping."""
     return decomposed_uid_mapping.loc[
         decomposed_uid_mapping["uid"] == hash_value, "reactome_id"
@@ -18,27 +20,27 @@ def get_reactome_id_from_hash(decomposed_uid_mapping: pd.DataFrame, hash_value:
 
 
 def create_reaction_id_map(
-    decomposed_uid_mapping: pd.DataFrame, 
-    reaction_ids: List[int], 
-    best_matches: pd.DataFrame
+    decomposed_uid_mapping: pd.DataFrame,
+    reaction_ids: List[int],
+    best_matches: pd.DataFrame,
 ) -> pd.DataFrame:
     """Create a mapping between reaction UIDs, reactome IDs, and input/output hashes."""
-    
+
     reaction_id_map_column_types = {
         "uid": str,
         "reactome_id": pd.Int64Dtype(),
         "input_hash": str,
         "output_hash": str,
     }
-    
+
     print("Checking best_matches contents:")
-    
+
     rows = []
     for _, match in best_matches.iterrows():
         incoming_hash = match["incoming"]
         outgoing_hash = match["outgoing"]
         reactome_id = get_reactome_id_from_hash(decomposed_uid_mapping, incoming_hash)
-        
+
         row = {
             "uid": str(uuid.uuid4()),
             "reactome_id": int(reactome_id),
@@ -48,23 +50,23 @@ def create_reaction_id_map(
         print("row")
         print(row)
         rows.append(row)
-    
+
     reaction_id_map = pd.DataFrame(rows).astype(reaction_id_map_column_types)
-    
+
     return reaction_id_map
 
 
 def create_uid_reaction_connections(
     reaction_id_map: pd.DataFrame,
     best_matches: pd.DataFrame,
-    decomposed_uid_mapping: pd.DataFrame
+    decomposed_uid_mapping: pd.DataFrame,
 ) -> pd.DataFrame:
     """Create connections between reaction UIDs based on best matches."""
 
     reactome_id_to_uid_mapping = dict(
         zip(reaction_id_map["reactome_id"], reaction_id_map["uid"])
     )
-   
+
     uid_reaction_connections_data = []
 
     for _, match in best_matches.iterrows():
@@ -72,8 +74,12 @@ def create_uid_reaction_connections(
         outgoing_hash = match["outgoing"]
 
         # Get reactome IDs for both hashes
-        preceding_reaction_id = get_reactome_id_from_hash(decomposed_uid_mapping, incoming_hash)
-        following_reaction_id = get_reactome_id_from_hash(decomposed_uid_mapping, outgoing_hash)
+        preceding_reaction_id = get_reactome_id_from_hash(
+            decomposed_uid_mapping, incoming_hash
+        )
+        following_reaction_id = get_reactome_id_from_hash(
+            decomposed_uid_mapping, outgoing_hash
+        )
 
         # Get corresponding UIDs
         preceding_uid = reactome_id_to_uid_mapping.get(preceding_reaction_id)
@@ -81,10 +87,9 @@ def create_uid_reaction_connections(
 
         # Only add connection if both UIDs exist
         if preceding_uid is not None and following_uid is not None:
-            uid_reaction_connections_data.append({
-                "preceding_uid": preceding_uid,
-                "following_uid": following_uid
-            })
+            uid_reaction_connections_data.append(
+                {"preceding_uid": preceding_uid, "following_uid": following_uid}
+            )
 
     # Fix: Always create DataFrame with proper columns, even if empty
     if uid_reaction_connections_data:
@@ -93,29 +98,29 @@ def create_uid_reaction_connections(
         # Return empty DataFrame with correct column structure
         return pd.DataFrame(columns=["preceding_uid", "following_uid"])
 
+
 def _execute_regulator_query(
-    graph: Graph, 
-    query: str, 
-    reaction_uuid: str, 
-    function_name: str
+    graph: Graph, query: str, reaction_uuid: str, function_name: str
 ) -> List[Dict[str, Any]]:
     """Execute a regulator query and return processed results."""
     try:
         result = graph.run(query)
         regulators = []
-        
+
         for record in result:
             regulator_uuid = str(uuid.uuid4())
-            regulators.append({
-                "reaction": reaction_uuid,
-                "PhysicalEntity": regulator_uuid,
-                "edge_type": "regulator",
-                "uuid": regulator_uuid,
-                "reaction_uuid": reaction_uuid,
-            })
-        
+            regulators.append(
+                {
+                    "reaction": reaction_uuid,
+                    "PhysicalEntity": regulator_uuid,
+                    "edge_type": "regulator",
+                    "uuid": regulator_uuid,
+                    "reaction_uuid": reaction_uuid,
+                }
+            )
+
         return regulators
-    
+
     except Exception as e:
         logger.error(f"Error in {function_name}", exc_info=True)
         raise e
@@ -124,60 +129,66 @@ def _execute_regulator_query(
 def get_catalysts_for_reaction(reaction_id_map: DataFrame, graph: Graph) -> DataFrame:
     """Get catalysts for reactions with stoichiometry using Neo4j graph queries."""
     catalyst_list = []
-    
+
     for _, row in reaction_id_map.iterrows():
         reaction_id = row["reactome_id"]
         reaction_uuid = row["uid"]
-        
+
         query = (
             f"MATCH (reaction:ReactionLikeEvent{{dbId: {reaction_id}}})-[r:catalystActivity]->(catalystActivity:CatalystActivity)-[:physicalEntity]->(catalyst:PhysicalEntity) "
             f"RETURN reaction.dbId AS reaction_id, catalyst.dbId AS catalyst_id, "
             f"COALESCE(r.stoichiometry, 1) AS stoichiometry, 'catalyst' AS edge_type"
         )
-        
+
         try:
             data = graph.run(query).data()
             for item in data:
                 item["uuid"] = str(uuid.uuid4())
                 item["reaction_uuid"] = reaction_uuid
             catalyst_list.extend(data)
-            
+
         except Exception as e:
             logger.error("Error in get_catalysts_for_reaction", exc_info=True)
             raise e
-    
+
     return pd.DataFrame(
         catalyst_list,
-        columns=["reaction_id", "catalyst_id", "stoichiometry", "edge_type", "uuid", "reaction_uuid"],
+        columns=[
+            "reaction_id",
+            "catalyst_id",
+            "stoichiometry",
+            "edge_type",
+            "uuid",
+            "reaction_uuid",
+        ],
     )
 
 
 def get_positive_regulators_for_reaction(
-    reaction_id_mapping: DataFrame, 
-    graph: Graph
+    reaction_id_mapping: DataFrame, graph: Graph
 ) -> DataFrame:
     """Get positive regulators for reactions using Neo4j graph queries."""
     regulators_list = []
-    
+
     for _, row in reaction_id_mapping.iterrows():
         reaction_id = row["reactome_id"]
         reaction_uuid = row["uid"]
-        
+
         if pd.isna(reaction_uuid):
             logger.error(f"No UUID found for reaction ID {reaction_id}")
             continue
-        
+
         query = (
             f"MATCH (reaction)-[:regulatedBy]->(regulator:PositiveRegulation)-[:regulator]->(pe:PhysicalEntity) "
             f"WHERE reaction.dbId = {reaction_id} "
             "RETURN reaction.dbId as reaction, pe.dbId as PhysicalEntity"
         )
-        
+
         regulators = _execute_regulator_query(
             graph, query, reaction_uuid, "get_positive_regulators_for_reaction"
         )
         regulators_list.extend(regulators)
-    
+
     return pd.DataFrame(
         regulators_list,
         columns=["reaction", "PhysicalEntity", "edge_type", "uuid", "reaction_uuid"],
@@ -186,31 +197,30 @@ def get_positive_regulators_for_reaction(
 
 
 def get_negative_regulators_for_reaction(
-    reaction_id_mapping: DataFrame, 
-    graph: Graph
+    reaction_id_mapping: DataFrame, graph: Graph
 ) -> DataFrame:
     """Get negative regulators for reactions using Neo4j graph queries."""
     regulators_list = []
-    
+
     for _, row in reaction_id_mapping.iterrows():
         reaction_id = row["reactome_id"]
         reaction_uuid = row["uid"]
-        
+
         if pd.isna(reaction_uuid):
             logger.error(f"No UUID found for reaction ID {reaction_id}")
             continue
-        
+
         query = (
             f"MATCH (reaction)-[:regulatedBy]->(regulator:NegativeRegulation)-[:regulator]->(pe:PhysicalEntity) "
             f"WHERE reaction.dbId = {reaction_id} "
             "RETURN reaction.dbId as reaction, pe.dbId as PhysicalEntity"
         )
-        
+
         regulators = _execute_regulator_query(
             graph, query, reaction_uuid, "get_negative_regulators_for_reaction"
         )
         regulators_list.extend(regulators)
-    
+
     return pd.DataFrame(
         regulators_list,
         columns=["reaction", "PhysicalEntity", "edge_type", "uuid", "reaction_uuid"],
@@ -223,24 +233,30 @@ def get_non_null_values(df: pd.DataFrame, column: str) -> List[Any]:
     return [value for value in df[column].tolist() if pd.notna(value)]
 
 
-def get_hash_for_reaction(reaction_id_map: pd.DataFrame, uid: str, hash_type: str) -> str:
+def get_hash_for_reaction(
+    reaction_id_map: pd.DataFrame, uid: str, hash_type: str
+) -> str:
     """Get input_hash or output_hash for a given reaction UID."""
-    return reaction_id_map.loc[
-        reaction_id_map["uid"] == uid, hash_type
-    ].iloc[0]
+    return reaction_id_map.loc[reaction_id_map["uid"] == uid, hash_type].iloc[0]
 
 
-def extract_uid_and_reactome_values(decomposed_uid_mapping: pd.DataFrame, hash_value: str) -> tuple:
+def extract_uid_and_reactome_values(
+    decomposed_uid_mapping: pd.DataFrame, hash_value: str
+) -> tuple:
     """Extract UID and Reactome ID values for a given hash."""
     filtered_rows = decomposed_uid_mapping[decomposed_uid_mapping["uid"] == hash_value]
-    
+
     uid_values = get_non_null_values(filtered_rows, "input_or_output_uid")
-    reactome_id_values = get_non_null_values(filtered_rows, "input_or_output_reactome_id")
-    
+    reactome_id_values = get_non_null_values(
+        filtered_rows, "input_or_output_reactome_id"
+    )
+
     return uid_values, reactome_id_values
 
 
-def assign_uuids(reactome_ids: List[str], reactome_id_to_uuid: Dict[str, str]) -> List[str]:
+def assign_uuids(
+    reactome_ids: List[str], reactome_id_to_uuid: Dict[str, str]
+) -> List[str]:
     """Assign UUIDs to Reactome IDs, creating new ones if they don't exist."""
     return [
         reactome_id_to_uuid.setdefault(reactome_id, str(uuid.uuid4()))
@@ -257,22 +273,24 @@ def determine_edge_properties(input_uid_values: List[Any]) -> tuple:
 
 
 def add_pathway_connections(
-    input_uuids: List[str], 
-    output_uuids: List[str], 
-    and_or: str, 
+    input_uuids: List[str],
+    output_uuids: List[str],
+    and_or: str,
     edge_type: str,
-    pathway_logic_network_data: List[Dict[str, Any]]
+    pathway_logic_network_data: List[Dict[str, Any]],
 ) -> None:
     """Add all input-output connections to the pathway network data."""
     for input_uuid in input_uuids:
         for output_uuid in output_uuids:
-            pathway_logic_network_data.append({
-                "source_id": input_uuid,
-                "target_id": output_uuid,
-                "pos_neg": "pos",
-                "and_or": and_or,
-                "edge_type": edge_type,
-            })
+            pathway_logic_network_data.append(
+                {
+                    "source_id": input_uuid,
+                    "target_id": output_uuid,
+                    "pos_neg": "pos",
+                    "and_or": and_or,
+                    "edge_type": edge_type,
+                }
+            )
 
 
 def extract_inputs_and_outputs(
@@ -285,48 +303,54 @@ def extract_inputs_and_outputs(
     pathway_logic_network_data: List[Dict[str, Any]],
 ) -> None:
     """Extract inputs and outputs with stoichiometry support."""
-    
+
     for reaction_uid in reaction_uids:
         # Extract input information WITH stoichiometry
         input_hash = get_hash_for_reaction(reaction_id_map, reaction_uid, "input_hash")
         input_uid_values, input_reactome_id_values = extract_uid_and_reactome_values(
             decomposed_uid_mapping, input_hash
         )
-        
+
         # NEW: Extract stoichiometry for inputs
         input_stoichiometry = extract_stoichiometry_for_entity(
             decomposed_uid_mapping, input_hash, input_reactome_id_values
         )
-        
+
         # Process preceding reactions (outputs)
         preceding_uids = uid_reaction_connections[
             uid_reaction_connections["following_uid"] == reaction_uid
         ]["preceding_uid"].tolist()
-        
+
         for preceding_uid in preceding_uids:
             # Extract output information WITH stoichiometry
-            output_hash = get_hash_for_reaction(reaction_id_map, preceding_uid, "output_hash")
-            output_uid_values, output_reactome_id_values = extract_uid_and_reactome_values(
-                decomposed_uid_mapping, output_hash
+            output_hash = get_hash_for_reaction(
+                reaction_id_map, preceding_uid, "output_hash"
+            )
+            output_uid_values, output_reactome_id_values = (
+                extract_uid_and_reactome_values(decomposed_uid_mapping, output_hash)
             )
-            
+
             # NEW: Extract stoichiometry for outputs
             output_stoichiometry = extract_stoichiometry_for_entity(
                 decomposed_uid_mapping, output_hash, output_reactome_id_values
             )
-            
+
             # Assign UUIDs
             input_uuids = assign_uuids(input_reactome_id_values, reactome_id_to_uuid)
             output_uuids = assign_uuids(output_reactome_id_values, reactome_id_to_uuid)
-            
+
             # Determine edge properties
             and_or, edge_type = determine_edge_properties(input_uid_values)
-            
+
             # Add connections with stoichiometry
             add_pathway_connections_with_stoichiometry(
-                input_uuids, output_uuids, 
-                input_stoichiometry, output_stoichiometry,
-                and_or, edge_type, pathway_logic_network_data
+                input_uuids,
+                output_uuids,
+                input_stoichiometry,
+                output_stoichiometry,
+                and_or,
+                edge_type,
+                pathway_logic_network_data,
             )
 
 
@@ -340,27 +364,32 @@ def append_regulators(
     edge_type: str,
 ) -> None:
     """Append regulatory relationships with stoichiometry."""
-    
+
     regulator_configs = [
         (catalyst_map, "pos", "catalyst"),
-        (negative_regulator_map, "neg", "regulator"),  
+        (negative_regulator_map, "neg", "regulator"),
         (positive_regulator_map, "pos", "regulator"),
     ]
-    
+
     for map_df, pos_neg, edge_type_override in regulator_configs:
         for _, row in map_df.iterrows():
-            stoichiometry = row.get("stoichiometry", 1)  # Get stoichiometry if available
-            
-            pathway_logic_network_data.append({
-                "source_id": row["uuid"],
-                "target_id": row["reaction_uuid"],
-                "pos_neg": pos_neg,
-                "and_or": and_or,
-                "edge_type": edge_type_override,
-                "source_stoichiometry": stoichiometry,
-                "target_stoichiometry": 1,  # Reactions are typically 1:1 with regulators
-                "stoichiometry_ratio": 1.0,
-            })
+            stoichiometry = row.get(
+                "stoichiometry", 1
+            )  # Get stoichiometry if available
+
+            pathway_logic_network_data.append(
+                {
+                    "source_id": row["uuid"],
+                    "target_id": row["reaction_uuid"],
+                    "pos_neg": pos_neg,
+                    "and_or": and_or,
+                    "edge_type": edge_type_override,
+                    "source_stoichiometry": stoichiometry,
+                    "target_stoichiometry": 1,  # Reactions are typically 1:1 with regulators
+                    "stoichiometry_ratio": 1.0,
+                }
+            )
+
 
 def _calculate_reaction_statistics(reaction_connections: pd.DataFrame) -> None:
     """Calculate and print statistics about reactions without preceding events."""
@@ -369,12 +398,14 @@ def _calculate_reaction_statistics(reaction_connections: pd.DataFrame) -> None:
             reaction_connections["preceding_reaction_id"]
         )
     ]
-    
+
     num_reactions_without_preceding = len(reactions_without_preceding_events)
     num_total_reactions = len(reaction_connections)
-    
+
     if num_total_reactions > 0:
-        percentage_without_preceding = (num_reactions_without_preceding / num_total_reactions) * 100
+        percentage_without_preceding = (
+            num_reactions_without_preceding / num_total_reactions
+        ) * 100
         print("Percentage of reactions without preceding events")
         print(percentage_without_preceding)
 
@@ -382,7 +413,7 @@ def _calculate_reaction_statistics(reaction_connections: pd.DataFrame) -> None:
 def _print_regulator_statistics(
     positive_regulator_map: pd.DataFrame,
     negative_regulator_map: pd.DataFrame,
-    catalyst_map: pd.DataFrame
+    catalyst_map: pd.DataFrame,
 ) -> None:
     """Print statistics about regulators and catalysts."""
     print(
@@ -393,56 +424,62 @@ def _print_regulator_statistics(
 
 
 def extract_stoichiometry_for_entity(
-    decomposed_uid_mapping: pd.DataFrame, 
-    hash_value: str, 
-    entity_reactome_ids: List[str]
+    decomposed_uid_mapping: pd.DataFrame,
+    hash_value: str,
+    entity_reactome_ids: List[str],
 ) -> Dict[str, int]:
     """Extract stoichiometry information for entities."""
     stoichiometry_map = {}
-    
+
     for reactome_id in entity_reactome_ids:
         # Get stoichiometry from decomposed mapping
         filtered_rows = decomposed_uid_mapping[
-            (decomposed_uid_mapping["uid"] == hash_value) & 
-            (decomposed_uid_mapping["input_or_output_reactome_id"] == reactome_id)
+            (decomposed_uid_mapping["uid"] == hash_value)
+            & (decomposed_uid_mapping["input_or_output_reactome_id"] == reactome_id)
         ]
-        
+
         if not filtered_rows.empty:
             stoichiometry = filtered_rows["stoichiometry"].iloc[0]
-            stoichiometry_map[reactome_id] = stoichiometry if pd.notna(stoichiometry) else 1
+            stoichiometry_map[reactome_id] = (
+                stoichiometry if pd.notna(stoichiometry) else 1
+            )
         else:
             stoichiometry_map[reactome_id] = 1
-    
+
     return stoichiometry_map
 
+
 def add_pathway_connections_with_stoichiometry(
-    input_uuids: List[str], 
-    output_uuids: List[str], 
+    input_uuids: List[str],
+    output_uuids: List[str],
     input_stoichiometry: Dict[str, int],
     output_stoichiometry: Dict[str, int],
-    and_or: str, 
+    and_or: str,
     edge_type: str,
-    pathway_logic_network_data: List[Dict[str, Any]]
+    pathway_logic_network_data: List[Dict[str, Any]],
 ) -> None:
     """Add connections with stoichiometry information."""
-    
+
     for input_uuid in input_uuids:
         for output_uuid in output_uuids:
             # Calculate stoichiometry ratio
             input_stoich = input_stoichiometry.get(input_uuid, 1)
             output_stoich = output_stoichiometry.get(output_uuid, 1)
             stoich_ratio = output_stoich / input_stoich if input_stoich > 0 else 1.0
-            
-            pathway_logic_network_data.append({
-                "source_id": input_uuid,
-                "target_id": output_uuid,
-                "pos_neg": "pos",
-                "and_or": and_or,
-                "edge_type": edge_type,
-                "source_stoichiometry": input_stoich,
-                "target_stoichiometry": output_stoich,
-                "stoichiometry_ratio": stoich_ratio,
-            })
+
+            pathway_logic_network_data.append(
+                {
+                    "source_id": input_uuid,
+                    "target_id": output_uuid,
+                    "pos_neg": "pos",
+                    "and_or": and_or,
+                    "edge_type": edge_type,
+                    "source_stoichiometry": input_stoich,
+                    "target_stoichiometry": output_stoich,
+                    "stoichiometry_ratio": stoich_ratio,
+                }
+            )
+
 
 def create_pathway_logic_network(
     decomposed_uid_mapping: pd.DataFrame,
@@ -450,7 +487,7 @@ def create_pathway_logic_network(
     best_matches: Any,
 ) -> pd.DataFrame:
     """Create a pathway logic network with stoichiometry support."""
-    
+
     # Enhanced column structure
     columns = {
         "source_id": pd.Series(dtype="Int64"),
@@ -460,40 +497,48 @@ def create_pathway_logic_network(
         "edge_type": pd.Series(dtype="str"),
         "source_stoichiometry": pd.Series(dtype="Int64"),  # NEW
         "target_stoichiometry": pd.Series(dtype="Int64"),  # NEW
-        "stoichiometry_ratio": pd.Series(dtype="float"),   # NEW
+        "stoichiometry_ratio": pd.Series(dtype="float"),  # NEW
     }
     pathway_logic_network_data = []
-    
+
     # Extract unique reaction IDs
     reaction_ids = pd.unique(
         reaction_connections[["preceding_reaction_id", "following_reaction_id"]]
         .stack()
         .dropna()
     )
-    
+
     # Calculate and print statistics
     _calculate_reaction_statistics(reaction_connections)
-    
+
     # Create mappings and connections
-    reaction_id_map = create_reaction_id_map(decomposed_uid_mapping, reaction_ids, best_matches)
+    reaction_id_map = create_reaction_id_map(
+        decomposed_uid_mapping, reaction_ids, best_matches
+    )
     catalyst_map = get_catalysts_for_reaction(reaction_id_map, graph)
-    negative_regulator_map = get_negative_regulators_for_reaction(reaction_id_map, graph)
-    positive_regulator_map = get_positive_regulators_for_reaction(reaction_id_map, graph)
-    
+    negative_regulator_map = get_negative_regulators_for_reaction(
+        reaction_id_map, graph
+    )
+    positive_regulator_map = get_positive_regulators_for_reaction(
+        reaction_id_map, graph
+    )
+
     uid_reaction_connections = create_uid_reaction_connections(
         reaction_id_map, best_matches, decomposed_uid_mapping
     )
-    
+
     reaction_uids = pd.unique(
         uid_reaction_connections[["preceding_uid", "following_uid"]].stack().dropna()
     )
-    
+
     # Print regulator statistics
-    _print_regulator_statistics(positive_regulator_map, negative_regulator_map, catalyst_map)
-    
+    _print_regulator_statistics(
+        positive_regulator_map, negative_regulator_map, catalyst_map
+    )
+
     # Process reactions and regulators
     reactome_id_to_uuid = {}
-    
+
     for reaction_uid in reaction_uids:
         extract_inputs_and_outputs(
             reaction_uid,
@@ -504,7 +549,7 @@ def create_pathway_logic_network(
             reactome_id_to_uuid,
             pathway_logic_network_data,
         )
-    
+
     and_or = ""
     edge_type = ""
     append_regulators(
@@ -516,22 +561,25 @@ def create_pathway_logic_network(
         and_or,
         edge_type,
     )
-    
+
     # Create final DataFrame
-    pathway_logic_network = pd.DataFrame(pathway_logic_network_data, columns=columns.keys())
-    
+    pathway_logic_network = pd.DataFrame(
+        pathway_logic_network_data, columns=columns.keys()
+    )
+
     # Find root inputs and terminal outputs
     root_inputs = find_root_inputs(pathway_logic_network)
     terminal_outputs = find_terminal_outputs(pathway_logic_network)
-    
+
     print(
         f"root_inputs: {root_inputs}\n"
         f"terminal_outputs: {terminal_outputs}\n"
         f"pathway_logic_network: {pathway_logic_network}"
     )
-    
+
     return pathway_logic_network
 
+
 def find_root_inputs(pathway_logic_network):
     root_inputs = pathway_logic_network[
         (pathway_logic_network["source_id"].notnull())
diff --git a/src/neo4j_connector.py b/src/neo4j_connector.py
index 502afc4..6218645 100755
--- a/src/neo4j_connector.py
+++ b/src/neo4j_connector.py
@@ -79,9 +79,12 @@ def get_complex_components_with_stoichiometry(entity_id: int) -> List[Dict[str,
         print(f"Complex {entity_id} components with stoichiometry: {result}")
         return result
     except Exception:
-        logger.error("Error in get_complex_components_with_stoichiometry", exc_info=True)
+        logger.error(
+            "Error in get_complex_components_with_stoichiometry", exc_info=True
+        )
         raise
 
+
 def get_set_members(entity_id: int) -> Set[int]:
     query_get_members_template: str = """
         MATCH (entity)-[:hasCandidate|hasMember]->(member)
@@ -113,7 +116,9 @@ def get_reactions(pathway_id: int, taxon_id: str) -> List[int]:
         raise
 
 
-def get_reaction_input_output_ids_with_stoichiometry(reaction_id: int, input_or_output: str) -> List[Dict[str, Any]]:
+def get_reaction_input_output_ids_with_stoichiometry(
+    reaction_id: int, input_or_output: str
+) -> List[Dict[str, Any]]:
     query_template: str = """
        MATCH (reaction)-[r:%s]->(io)
            WHERE (reaction:Reaction OR reaction:ReactionLikeEvent) AND reaction.dbId=%s
@@ -128,9 +133,12 @@ def get_reaction_input_output_ids_with_stoichiometry(reaction_id: int, input_or_
         print(f"Reaction {reaction_id} {input_or_output}s with stoichiometry: {result}")
         return result
     except Exception:
-        logger.error("Error in get_reaction_input_output_ids_with_stoichiometry", exc_info=True)
+        logger.error(
+            "Error in get_reaction_input_output_ids_with_stoichiometry", exc_info=True
+        )
         raise
 
+
 def get_reference_entity_id(entity_id: int) -> Union[str, None]:
     query_template: str = """
         MATCH (reference_database:ReferenceDatabase)<-[:referenceDatabase]-(reference_entity_gene:ReferenceEntity)<-[:referenceGene]-(reference_entity:ReferenceEntity)<-[:referenceEntity]-(pe:PhysicalEntity)
diff --git a/src/pathway_generator.py b/src/pathway_generator.py
index 57322b4..8677679 100755
--- a/src/pathway_generator.py
+++ b/src/pathway_generator.py
@@ -42,9 +42,7 @@ def generate_pathway_file(
         [decomposed_uid_mapping, best_matches_list] = get_decomposed_uid_mapping(
             pathway_id, reaction_connections
         )
-        best_matches = pd.DataFrame(
-            best_matches_list, columns=["incoming", "outgoing"]
-        )
+        best_matches = pd.DataFrame(best_matches_list, columns=["incoming", "outgoing"])
         decomposed_uid_mapping.to_csv(decomposed_uid_mapping_file, index=False)
         best_matches.to_csv(best_matches_file, index=False)
 
diff --git a/src/reaction_generator.py b/src/reaction_generator.py
index 5bfd287..cd75f92 100755
--- a/src/reaction_generator.py
+++ b/src/reaction_generator.py
@@ -39,7 +39,7 @@
         "strategy": "use_canonical",
         "canonical_representative": "68524",
         "description": "Ubiquitin isoforms - all serve identical protein degradation signaling function",
-        "biological_rationale": "Multiple ubiquitin genes produce identical/near-identical proteins"
+        "biological_rationale": "Multiple ubiquitin genes produce identical/near-identical proteins",
     },
 }
 
@@ -83,13 +83,11 @@ def is_functionally_equivalent_entity(entity_id: str) -> Optional[Dict[str, Any]
 
 
 def apply_functional_equivalence_strategy(
-    entity_id: str, 
-    functional_group: Dict[str, Any],
-    original_result: Set[str]
+    entity_id: str, functional_group: Dict[str, Any], original_result: Set[str]
 ) -> Set[str]:
     """Apply functional equivalence strategy to entity decomposition."""
     strategy = functional_group.get("strategy", "use_canonical")
-    
+
     if strategy == "use_canonical":
         canonical_rep = functional_group.get("canonical_representative", entity_id)
         return {canonical_rep}
@@ -98,9 +96,9 @@ def apply_functional_equivalence_strategy(
 
 
 def get_broken_apart_ids(
-    broken_apart_members: list[set[str]], 
+    broken_apart_members: list[set[str]],
     reactome_id: ReactomeID,
-    stoichiometry_map: Dict[str, int] = None  # NEW PARAMETER
+    stoichiometry_map: Dict[str, int] = None,  # NEW PARAMETER
 ) -> Set[UID]:
     """Get broken apart IDs with stoichiometry support."""
     global decomposed_uid_mapping
@@ -119,9 +117,11 @@ def get_broken_apart_ids(
 
         iterproduct_components = list(itertools.product(*new_broken_apart_members))
         total_combinations = len(iterproduct_components)
-        
+
         if total_combinations > 1000:
-            logger.warning(f"Large combination set for reaction {reactome_id}: {total_combinations} combinations")
+            logger.warning(
+                f"Large combination set for reaction {reactome_id}: {total_combinations} combinations"
+            )
 
         iterproduct_components_as_sets = [
             set(map(str, item)) for item in iterproduct_components
@@ -133,7 +133,7 @@ def get_broken_apart_ids(
         uid = str(uuid.uuid4())
         rows: List[DataFrameRow] = []
         row: DataFrameRow
-        
+
         for member in broken_apart_members:
             if is_valid_uuid(member):
                 component_ids = decomposed_uid_mapping.loc[
@@ -142,7 +142,7 @@ def get_broken_apart_ids(
                 for component_id in component_ids:
                     # NEW: Add stoichiometry information
                     stoichiometry = stoichiometry_map.get(component_id, 1)
-                    
+
                     row = {
                         "uid": uid,
                         "component_id": component_id,
@@ -160,7 +160,7 @@ def get_broken_apart_ids(
             else:
                 # NEW: Add stoichiometry information
                 stoichiometry = stoichiometry_map.get(member, 1)
-                
+
                 row = {
                     "uid": uid,
                     "component_id": member,
@@ -183,9 +183,9 @@ def get_broken_apart_ids(
 
 
 def get_uids_for_iterproduct_components(
-    iterproduct_components: List[Set[ComponentID]], 
+    iterproduct_components: List[Set[ComponentID]],
     reactome_id: ReactomeID,
-    stoichiometry_map: Dict[str, int] = None  # NEW PARAMETER
+    stoichiometry_map: Dict[str, int] = None,  # NEW PARAMETER
 ) -> Set[UID]:
     """Get UID for iterproduct components with stoichiometry support."""
     global decomposed_uid_mapping
@@ -197,7 +197,7 @@ def get_uids_for_iterproduct_components(
     for component in iterproduct_components:
         component_to_input_or_output: Dict[ComponentID, InputOutputID] = {}
         component_stoichiometries: Dict[ComponentID, int] = {}  # NEW
-        
+
         for item in component:
             if is_valid_uuid(item):
                 selected_rows = decomposed_uid_mapping.loc[
@@ -207,15 +207,19 @@ def get_uids_for_iterproduct_components(
                     component_id = selected_row["component_id"]
                     component_to_input_or_output[component_id] = item
                     # NEW: Get stoichiometry from existing data
-                    component_stoichiometries[component_id] = selected_row.get("stoichiometry", 1)
+                    component_stoichiometries[component_id] = selected_row.get(
+                        "stoichiometry", 1
+                    )
             else:
                 component_to_input_or_output[item] = item
                 # NEW: Get stoichiometry from the map
                 component_stoichiometries[item] = stoichiometry_map.get(item, 1)
 
         # MODIFIED: Include stoichiometry in UID calculation
-        uid_data = [(comp, io, component_stoichiometries.get(comp, 1)) 
-                   for comp, io in sorted(component_to_input_or_output.items())]
+        uid_data = [
+            (comp, io, component_stoichiometries.get(comp, 1))
+            for comp, io in sorted(component_to_input_or_output.items())
+        ]
         uid = hashlib.sha256(str(uid_data).encode()).hexdigest()
 
         rows: List[DataFrameRow] = []
@@ -226,10 +230,10 @@ def get_uids_for_iterproduct_components(
             input_or_output_reactome_id = (
                 input_or_output_id if not is_valid_uuid(input_or_output_id) else None
             )
-            
+
             # NEW: Get stoichiometry information
             stoichiometry = component_stoichiometries.get(component_id, 1)
-            
+
             row: DataFrameRow = {
                 "uid": uid,
                 "component_id": component_id,
@@ -261,7 +265,9 @@ def break_apart_entity(entity_id: int) -> Set[str]:
     functional_group = is_functionally_equivalent_entity(entity_str)
     if functional_group:
         original_result = break_apart_entity_standard(entity_id)
-        return apply_functional_equivalence_strategy(entity_str, functional_group, original_result)
+        return apply_functional_equivalence_strategy(
+            entity_str, functional_group, original_result
+        )
 
     # Use original decomposition logic
     return break_apart_entity_standard(entity_id)
@@ -292,7 +298,7 @@ def break_apart_entity_standard(entity_id: int) -> Set[str]:
         contains_thing = contains_reference_gene_product_molecule_or_isoform(entity_id)
         if contains_thing:
             return set([str(entity_id)])
-        
+
         member_ids = get_set_members(entity_id)
 
         member_list: List[str] = []
@@ -307,17 +313,17 @@ def break_apart_entity_standard(entity_id: int) -> Set[str]:
 
     elif "Complex" in labels:
         broken_apart_members: List[Set[str]] = []
-        
+
         # MODIFIED: Get components with stoichiometry
         component_data = get_complex_components_with_stoichiometry(entity_id)
         print(f"Complex {entity_id} component data: {component_data}")
-        
+
         component_stoichiometry = {}
         member_ids = []
-        
+
         for component_info in component_data:
-            component_id = component_info['component_id']
-            stoichiometry = component_info['stoichiometry']
+            component_id = component_info["component_id"]
+            stoichiometry = component_info["stoichiometry"]
             component_stoichiometry[str(component_id)] = stoichiometry
             member_ids.append(component_id)
 
@@ -357,28 +363,36 @@ def decompose_by_reactions(reaction_ids: List[int]) -> List[Any]:
     all_best_matches = []
     for reaction_id in reaction_ids:
         print(f"Processing reaction {reaction_id}")
-        
+
         # MODIFIED: Get inputs with stoichiometry
-        input_data = get_reaction_input_output_ids_with_stoichiometry(reaction_id, "input")
-        input_ids = [item['entity_id'] for item in input_data]
-        input_stoichiometry = {str(item['entity_id']): item['stoichiometry'] for item in input_data}
-        
+        input_data = get_reaction_input_output_ids_with_stoichiometry(
+            reaction_id, "input"
+        )
+        input_ids = [item["entity_id"] for item in input_data]
+        input_stoichiometry = {
+            str(item["entity_id"]): item["stoichiometry"] for item in input_data
+        }
+
         print(f"Reaction {reaction_id} inputs: {input_ids}")
         print(f"Input stoichiometry: {input_stoichiometry}")
-        
+
         broken_apart_input_id = [break_apart_entity(input_id) for input_id in input_ids]
         input_combinations = get_broken_apart_ids(
             broken_apart_input_id, str(reaction_id), input_stoichiometry
         )
 
         # MODIFIED: Get outputs with stoichiometry
-        output_data = get_reaction_input_output_ids_with_stoichiometry(reaction_id, "output")
-        output_ids = [item['entity_id'] for item in output_data]
-        output_stoichiometry = {str(item['entity_id']): item['stoichiometry'] for item in output_data}
-        
+        output_data = get_reaction_input_output_ids_with_stoichiometry(
+            reaction_id, "output"
+        )
+        output_ids = [item["entity_id"] for item in output_data]
+        output_stoichiometry = {
+            str(item["entity_id"]): item["stoichiometry"] for item in output_data
+        }
+
         print(f"Reaction {reaction_id} outputs: {output_ids}")
         print(f"Output stoichiometry: {output_stoichiometry}")
-        
+
         broken_apart_output_id = [
             break_apart_entity(output_id) for output_id in output_ids
         ]
@@ -414,14 +428,14 @@ def get_decomposed_uid_mapping(
 
     reaction_ids = reaction_ids[~pd.isna(reaction_ids)]  # removing NA value from list
     reaction_ids = reaction_ids.astype(int).tolist()  # converting to integer
-    
+
     print(f"Processing {len(reaction_ids)} reactions for pathway {pathway_id}")
-    
+
     best_matches = decompose_by_reactions(list(reaction_ids))
-    
+
     print(f"Final decomposed_uid_mapping shape: {decomposed_uid_mapping.shape}")
     print(f"Stoichiometry column summary:")
-    if 'stoichiometry' in decomposed_uid_mapping.columns:
-        print(decomposed_uid_mapping['stoichiometry'].value_counts())
+    if "stoichiometry" in decomposed_uid_mapping.columns:
+        print(decomposed_uid_mapping["stoichiometry"].value_counts())
 
     return (decomposed_uid_mapping, best_matches)

From 53632b253c4341ccb573ec2b37c024232ae5c7f6 Mon Sep 17 00:00:00 2001
From: Thomas Barbazuk <tbbarbazuk@gmail.com>
Date: Mon, 4 Aug 2025 18:52:35 +0200
Subject: [PATCH 4/4] added in stoichiomtery. Runs well, more changes
 fortcoming

---
 src/reaction_generator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/reaction_generator.py b/src/reaction_generator.py
index cd75f92..b3c428e 100755
--- a/src/reaction_generator.py
+++ b/src/reaction_generator.py
@@ -187,7 +187,7 @@ def get_uids_for_iterproduct_components(
     reactome_id: ReactomeID,
     stoichiometry_map: Dict[str, int] = None,  # NEW PARAMETER
 ) -> Set[UID]:
-    """Get UID for iterproduct components with stoichiometry support."""
+    """Get UID for iterproduct components."""
     global decomposed_uid_mapping
 
     if stoichiometry_map is None:
@@ -434,7 +434,7 @@ def get_decomposed_uid_mapping(
     best_matches = decompose_by_reactions(list(reaction_ids))
 
     print(f"Final decomposed_uid_mapping shape: {decomposed_uid_mapping.shape}")
-    print(f"Stoichiometry column summary:")
+    print("Stoichiometry column summary:")
     if "stoichiometry" in decomposed_uid_mapping.columns:
         print(decomposed_uid_mapping["stoichiometry"].value_counts())