diff --git a/scripts/featured b/scripts/featured index 24cfa77..1d443d2 100644 --- a/scripts/featured +++ b/scripts/featured @@ -266,8 +266,12 @@ class FeatureHandler(object): return False if not enable and not disable: - syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature {}" - .format(feature.state, feature.name)) + # state=None means the FEATURE entry was deleted by a concurrent uninstall + # while this notification was queued — not an error. Any other non-None + # value here is a genuinely unexpected state and should be flagged. + if feature.state is not None: + syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature {}" + .format(feature.state, feature.name)) return False if feature.delayed and not self.is_delayed_enabled: @@ -323,6 +327,12 @@ class FeatureHandler(object): return self._config_db.mod_entry(FEATURE_TBL, feature_config.name, {'has_per_asic_scope': str(feature_config.has_per_asic_scope)}) self._config_db.mod_entry(FEATURE_TBL, feature_config.name, {'has_global_scope': str(feature_config.has_global_scope)}) + # If the entry was deleted by a concurrent deregister, mod_entry recreates it + # without a 'state' field. Remove the partial entry to avoid a ghost record. + post_entry = self._config_db.get_entry(FEATURE_TBL, feature_config.name) + if post_entry and 'state' not in post_entry: + self._config_db.set_entry(FEATURE_TBL, feature_config.name, None) + return # sync has_per_asic_scope to CONFIG_DB in namespaces in multi-asic platform for ns, db in self.ns_cfg_db.items(): @@ -524,7 +534,17 @@ class FeatureHandler(object): def resync_feature_state(self, feature): current_entry = self._config_db.get_entry('FEATURE', feature.name) - current_feature_state = current_entry.get('state') if current_entry else None + + # Entry was deleted by a concurrent deregister — do not recreate it. + if not current_entry: + return + + # Partial entry left by sync_feature_scope racing with deregister — remove it. + if 'state' not in current_entry: + self._config_db.set_entry('FEATURE', feature.name, None) + return + + current_feature_state = current_entry.get('state') if feature.state == current_feature_state: return diff --git a/tests/featured/featured_test.py b/tests/featured/featured_test.py index a563075..b2444f1 100644 --- a/tests/featured/featured_test.py +++ b/tests/featured/featured_test.py @@ -282,7 +282,8 @@ def test_feature_resync(self): } mock_db.get_entry.return_value = None feature_handler.sync_state_field(feature_table) - mock_db.mod_entry.assert_called_with('FEATURE', 'sflow', {'state': 'enabled'}) + # Missing FEATURE row: resync_feature_state skips mod_entry (avoid recreating a row removed concurrently). + mock_db.mod_entry.assert_not_called() mock_db.mod_entry.reset_mock() feature_handler = featured.FeatureHandler(mock_db, mock_feature_state_table, {}, False) @@ -321,7 +322,46 @@ def test_feature_resync(self): } feature_handler.sync_state_field(feature_table) mock_db.mod_entry.assert_called_with('FEATURE', 'sflow', {'state': 'enabled'}) - + mock_db.mod_entry.reset_mock() + + # Partial entry (no 'state' key): left behind by sync_feature_scope racing with + # deregister. resync_feature_state must delete it rather than completing it. + feature_handler = featured.FeatureHandler(mock_db, mock_feature_state_table, {}, False) + mock_db.get_entry.return_value = { + 'has_per_asic_scope': 'False', + 'has_global_scope': 'True', + } + mock_db.set_entry = mock.MagicMock() + feature_handler.sync_state_field(feature_table) + mock_db.set_entry.assert_called_with('FEATURE', 'sflow', None) + mock_db.mod_entry.assert_not_called() + + def test_sync_feature_scope_toctou(self): + """sync_feature_scope must delete a partial entry it accidentally creates when + the FEATURE row is deleted by a concurrent deregister between the mod_entry + calls and the post-write existence check (TOCTOU).""" + mock_db = mock.MagicMock() + mock_feature_state_table = mock.MagicMock() + + feature_handler = featured.FeatureHandler(mock_db, mock_feature_state_table, {}, False) + feature = featured.Feature('sflow', { + 'state': 'disabled', + 'has_global_scope': 'True', + 'has_per_asic_scope': 'False', + 'delayed': 'False', + }) + + # Post-write check sees a partial entry (no 'state' key): the FEATURE row was + # deleted by deregister and then recreated by mod_entry during the race + # window, leaving only the scope fields. + mock_db.get_entry.return_value = { + 'has_per_asic_scope': 'False', + 'has_global_scope': 'True', + } + + feature_handler.sync_feature_scope(feature) + mock_db.set_entry.assert_called_with(featured.FEATURE_TBL, 'sflow', None) + def test_port_init_done_twice(self): """There could be multiple "PortInitDone" event in case of swss restart(either due to crash or due to manual operation). swss