ActivitySim · i-am-sijia · Nov 13, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml
@@ -130,6 +130,7 @@ jobs:
 
       - run: uv run pytest test/test_skim_name_conflicts.py
       - run: uv run pytest test/random_seed/test_random_seed.py
+      - run: uv run pytest test/skip_failed_choices/test_skip_failed_choices.py
 
   builtin_regional_models:
     needs: foundation

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -234,6 +234,11 @@ def location_sample(
 ):
     # FIXME - MEMORY HACK - only include columns actually used in spec
     chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and (
+        "household_id" in persons_merged.columns
+    ):
+        chooser_columns = chooser_columns + ["household_id"]
     choosers = persons_merged[chooser_columns]
 
     # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
@@ -390,6 +395,11 @@ def location_presample(
     # FIXME maybe we should add it for multi-zone (from maz_taz) if missing?
     chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
     chooser_columns = [HOME_TAZ if c == HOME_MAZ else c for c in chooser_columns]
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and (
+        "household_id" in persons_merged.columns
+    ):
+        chooser_columns = chooser_columns + ["household_id"]
     choosers = persons_merged[chooser_columns]
 
     # create wrapper with keys for this lookup - in this case there is a HOME_TAZ in the choosers
@@ -620,6 +630,11 @@ def run_location_simulate(
 
     # FIXME - MEMORY HACK - only include columns actually used in spec
     chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and (
+        "household_id" in persons_merged.columns
+    ):
+        chooser_columns = chooser_columns + ["household_id"]
     choosers = persons_merged[chooser_columns]
 
     alt_dest_col_name = model_settings.ALT_DEST_COL_NAME
@@ -1072,6 +1087,33 @@ def iterate_location_choice(
         else:
             choices_df = choices_df_
 
+        if (
+            state.settings.skip_failed_choices
+            and state.get("num_skipped_households", 0) > 0
+        ):
+            # drop choices that belong to the failed households: state.skipped_household_ids
+            # so that their choices are not considered in shadow price calculations
+            # first append household_id to choices_df
+            choices_df = choices_df.merge(
+                persons_merged_df[["household_id"]],
+                left_index=True,
+                right_index=True,
+                how="left",
+            )
+            if len(choices_df) > 0:
+                # Get all household IDs from all trace_labels in the dictionary
+                import itertools
+
+                skipped_household_ids_dict = state.get("skipped_household_ids", dict())
+                all_skipped_hh_ids = set(
+                    itertools.chain.from_iterable(skipped_household_ids_dict.values())
+                )
+
+                choices_df = choices_df[
+                    ~choices_df["household_id"].isin(all_skipped_hh_ids)
+                ]
+            choices_df = choices_df.drop(columns=["household_id"])
+
         spc.set_choices(
             choices=choices_df["choice"],
             segment_ids=persons_merged_df[chooser_segment_column].reindex(

diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
@@ -472,6 +472,11 @@ def school_escorting(
         # reduce memory by limiting columns if selected columns are supplied
         chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
         if chooser_columns is not None:
+            # Drop this when PR #1017 is merged
+            if ("household_id" not in chooser_columns) and (
+                "household_id" in choosers.columns
+            ):
+                chooser_columns = chooser_columns + ["household_id"]
             chooser_columns = chooser_columns + participant_columns
             choosers = choosers[chooser_columns]
 

diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py
@@ -90,6 +90,34 @@ def write_trip_matrices(
 
     trips_df = annotate_trips(state, trips, network_los, model_settings)
 
+    # This block adjusts household sample rate column to account for skipped households.
+    # Note: the `HH_EXPANSION_WEIGHT_COL` is pointing to the `sample_rate` column in the households table.
+    # Based on the calculation in write_matrices() function, the sample_rate is used to calculate the expansion weight as 1 / sample_rate.
+    # A sample_rate of 0.01 means the sample household should be expanded 1/0.01 = 100 times in the actual population households.
+    # In simulation, the `sample_rate` is calculated and added to the synthetic households
+    # based on household_sample_size / total_household_count, and therefore is the same for all households.
+    # In estimation, the `sample_rate` may vary by household, but weights are not used in estimation, and write_trip_matrices is not called during estimation.
+    # But we still try to cover both cases (when rates are the same vs when they vary) here for consistency.
+    hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL
+    if state.get("num_skipped_households", 0) > 0:
+        logger.info(
+            f"Adjusting household sample rate in {hh_weight_col} to account for {state.get('num_skipped_households', 0)} skipped households."
+        )
+        # adjust the hh sample rates to account for skipped households
+        # first get the total expansion weight of the skipped households, which will be the sum of inverse of their sample rates
+        skipped_household_weights = (
+            1 / state.get_dataframe("households_skipped")[hh_weight_col]
+        ).sum()
+        # next get the total expansion weight of the remaining households
+        remaining_household_weights = (
+            1 / state.get_dataframe("households")[hh_weight_col]
+        ).sum()
+        # the adjustment factor is the remaining household weight / (remaining household weight + skipped household weight)
+        adjustment_factor = remaining_household_weights / (
+            remaining_household_weights + skipped_household_weights
+        )
+        trips_df[hh_weight_col] = trips_df[hh_weight_col] * adjustment_factor
+
     if model_settings.SAVE_TRIPS_TABLE:
         state.add_table("trips", trips_df)
 

diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py
@@ -366,7 +366,20 @@ def trip_mode_choice(
         "trip_mode_choice choices", trips_df[mode_column_name], value_counts=True
     )
 
-    assert not trips_df[mode_column_name].isnull().any()
+    # if we're skipping failed choices, the trip modes for failed simulations will be null
+    if state.settings.skip_failed_choices:
+        # Get all household IDs from all trace_labels in the dictionary - more efficient flattening
+        import itertools
+
+        skipped_household_ids_dict = state.get("skipped_household_ids", dict())
+        all_skipped_hh_ids = set(
+            itertools.chain.from_iterable(skipped_household_ids_dict.values())
+        )
+
+        mask_skipped = trips_df["household_id"].isin(all_skipped_hh_ids)
+        assert not trips_df.loc[~mask_skipped, mode_column_name].isnull().any()
+    else:
+        assert not trips_df[mode_column_name].isnull().any()
 
     state.add_table("trips", trips_df)
 
@@ -382,6 +395,8 @@ def trip_mode_choice(
     # need to update locals_dict to access skims that are the same .shape as trips table
     locals_dict = {}
     locals_dict.update(constants)
+    if state.settings.skip_failed_choices:
+        trips_merged = trips_merged.loc[~mask_skipped]
     simulate.set_skim_wrapper_targets(trips_merged, skims)
     locals_dict.update(skims)
     locals_dict["timeframe"] = "trip"

diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py
@@ -1043,6 +1043,41 @@ def force_escortee_trip_modes_to_match_chauffeur(state: workflow.State, trips):
         f"Changed {diff.sum()} trip modes of school escortees to match their chauffeur"
     )
 
+    # trip_mode can be na if the run allows skipping failed choices and the trip mode choice has failed
+    # in that case we can't assert that all trip modes are filled
+    # instead, we throw a warning about how many are missing, and return early
+    if state.settings.skip_failed_choices:
+        missing_count = trips.trip_mode.isna().sum()
+        if missing_count > 0:
+            # check if the missing trip modes are all because of simulation failures
+            # i.e., they are from households that are in the skipped_household_ids set
+            import itertools
+
+            skipped_household_ids_dict = state.get("skipped_household_ids", dict())
+            skipped_household_ids = set(
+                itertools.chain.from_iterable(skipped_household_ids_dict.values())
+            )
+            missing_household_ids = set(
+                trips[trips.trip_mode.isna()]["household_id"].unique()
+            )
+            # log a warning about the missing trip modes for skipped households
+            missing_count_due_to_sim_fail = len(
+                trips[
+                    trips.trip_mode.isna()
+                    & trips.household_id.isin(skipped_household_ids)
+                ]
+            )
+            logger.warning(
+                f"Missing trip mode for {missing_count_due_to_sim_fail} trips due to simulation failures in trip mode choice, "
+                f"these records and their corresponding households are being skipped: {missing_household_ids}"
+            )
+            # throw assertion error if there are missing trip modes for households that were not skipped
+            assert missing_household_ids.issubset(skipped_household_ids), (
+                f"Missing trip modes for households that were not skipped: {missing_household_ids - skipped_household_ids}. "
+                f"Missing trip modes for: {trips[trips.trip_mode.isna() & ~trips.household_id.isin(skipped_household_ids)]}"
+            )
+            return trips
+
     assert (
         ~trips.trip_mode.isna()
     ).all(), f"Missing trip mode for {trips[trips.trip_mode.isna()]}"

diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py
@@ -625,6 +625,11 @@ def run_destination_sample(
     # if special person id is passed
     chooser_id_column = model_settings.CHOOSER_ID_COLUMN
 
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and (
+        "household_id" in persons_merged.columns
+    ):
+        chooser_columns = chooser_columns + ["household_id"]
     persons_merged = persons_merged[
         [c for c in persons_merged.columns if c in chooser_columns]
     ]
@@ -799,6 +804,11 @@ def run_destination_simulate(
     # if special person id is passed
     chooser_id_column = model_settings.CHOOSER_ID_COLUMN
 
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and (
+        "household_id" in persons_merged.columns
+    ):
+        chooser_columns = chooser_columns + ["household_id"]
     persons_merged = persons_merged[
         [c for c in persons_merged.columns if c in chooser_columns]
     ]

diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py
@@ -692,6 +692,9 @@ def run_od_sample(
     choosers = tours
     # FIXME - MEMORY HACK - only include columns actually used in spec
     chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and ("household_id" in choosers.columns):
+        chooser_columns = chooser_columns + ["household_id"]
     choosers = choosers[chooser_columns]
 
     # interaction_sample requires that choosers.index.is_monotonic_increasing
@@ -951,6 +954,9 @@ def run_od_simulate(
 
     # FIXME - MEMORY HACK - only include columns actually used in spec
     chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and ("household_id" in choosers.columns):
+        chooser_columns = chooser_columns + ["household_id"]
     choosers = choosers[chooser_columns]
 
     # interaction_sample requires that choosers.index.is_monotonic_increasing

diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py
@@ -40,6 +40,12 @@ def run_tour_scheduling(
         c for c in model_columns if c not in logsum_columns
     ]
 
+    # Drop this when PR #1017 is merged
+    if ("household_id" not in chooser_columns) and (
+        "household_id" in persons_merged.columns
+    ):
+        chooser_columns = chooser_columns + ["household_id"]
+
     persons_merged = expressions.filter_chooser_columns(persons_merged, chooser_columns)
 
     timetable = state.get_injectable("timetable")

diff --git a/activitysim/abm/tables/households.py b/activitysim/abm/tables/households.py
@@ -110,6 +110,12 @@ def households(state: workflow.State) -> pd.DataFrame:
 
     # replace table function with dataframe
     state.add_table("households", df)
+    if state.settings.skip_failed_choices:
+        logger.info(
+            "Note: 'skip_failed_choices' is enabled; households may be skipped when simulation fails."
+        )
+    # initialize skipped households table as empty and same columns as households
+    state.add_table("households_skipped", df.iloc[0:0])
 
     state.get_rn_generator().add_channel("households", df)
 

diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py
@@ -469,6 +469,24 @@ def run(args):
     if memory_sidecar_process:
         memory_sidecar_process.stop()
 
+    # print out a summary of households skipped due to failed choices
+    # we want to see number of unique households skipped by trace_label
+    if state.settings.skip_failed_choices:
+        skipped_household_ids_dict = state.get("skipped_household_ids", dict())
+        for trace_label, hh_id_set in skipped_household_ids_dict.items():
+            logger.warning(
+                f"Number of unique households skipped for trace_label '{trace_label}': {len(hh_id_set)}. They are: {sorted(hh_id_set)}"
+            )
+        # also log the total number of unique households skipped across all trace_labels
+        import itertools
+
+        all_skipped_hh_ids = set(
+            itertools.chain.from_iterable(skipped_household_ids_dict.values())
+        )
+        logger.warning(
+            f"Total number of unique households skipped across all trace_labels: {len(all_skipped_hh_ids)}."
+        )
+
     if state.settings.expression_profile:
         # generate a summary of slower expression evaluation times
         # across all models and write to a file

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
@@ -776,11 +776,27 @@ def _check_store_skims_in_shm(self):
 
     check_model_settings: bool = True
     """
-    run checks to validate that YAML settings files are loadable and spec and coefficent csv can be resolved.
+    run checks to validate that YAML settings files are loadable and spec and coefficient csv can be resolved.
 
     should catch many common errors early, including missing required configurations or specified coefficient labels without defined values.  
     """
 
+    skip_failed_choices: bool = True
+    """
+    Skip households that cause errors during processing instead of failing the model run.
+
+    .. versionadded:: 1.6
+    """
+
+    fraction_of_failed_choices_allowed: float = 0.1
+    """
+    Threshold for the fraction of households that can be skipped before failing the model run,
+    used in conjunction with `skip_failed_choices`.
+    We want to skip problems when they are rare, but fail the run if they are common.
+
+    .. versionadded:: 1.6
+    """
+
     other_settings: dict[str, Any] = None
 
     def _get_attr(self, attr):

diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py
@@ -351,6 +351,11 @@ def _interaction_sample_simulate(
         # that is, we want the index value of the row that is offset by <position> rows into the
         # tranche of this choosers alternatives created by cross join of alternatives and choosers
 
+        # when skip failed choices is enabled, the position may be -99 for failed choices, which gets droppped eventually
+        # here we just need to clip to zero to avoid getting the wrong index in the take() below
+        if state.settings.skip_failed_choices:
+            positions = positions.clip(lower=0)
+
         # resulting pandas Int64Index has one element per chooser row and is in same order as choosers
         choices = alternatives[choice_column].take(positions + first_row_offsets)