Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions scripts/featured
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,12 @@ class FeatureHandler(object):
return False

if not enable and not disable:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature {}"
.format(feature.state, feature.name))
# state=None means the FEATURE entry was deleted by a concurrent uninstall
# while this notification was queued — not an error. Any other non-None
# value here is a genuinely unexpected state and should be flagged.
if feature.state is not None:
syslog.syslog(syslog.LOG_ERR, "Unexpected state value '{}' for feature {}"
.format(feature.state, feature.name))
return False

if feature.delayed and not self.is_delayed_enabled:
Expand Down Expand Up @@ -323,6 +327,12 @@ class FeatureHandler(object):
return
self._config_db.mod_entry(FEATURE_TBL, feature_config.name, {'has_per_asic_scope': str(feature_config.has_per_asic_scope)})
self._config_db.mod_entry(FEATURE_TBL, feature_config.name, {'has_global_scope': str(feature_config.has_global_scope)})
# If the entry was deleted by a concurrent deregister, mod_entry recreates it
# without a 'state' field. Remove the partial entry to avoid a ghost record.
post_entry = self._config_db.get_entry(FEATURE_TBL, feature_config.name)
if post_entry and 'state' not in post_entry:
self._config_db.set_entry(FEATURE_TBL, feature_config.name, None)
return

# sync has_per_asic_scope to CONFIG_DB in namespaces in multi-asic platform
for ns, db in self.ns_cfg_db.items():
Expand Down Expand Up @@ -524,7 +534,17 @@ class FeatureHandler(object):

def resync_feature_state(self, feature):
current_entry = self._config_db.get_entry('FEATURE', feature.name)
current_feature_state = current_entry.get('state') if current_entry else None

# Entry was deleted by a concurrent deregister — do not recreate it.
if not current_entry:
return

# Partial entry left by sync_feature_scope racing with deregister — remove it.
if 'state' not in current_entry:
self._config_db.set_entry('FEATURE', feature.name, None)
return

current_feature_state = current_entry.get('state')

if feature.state == current_feature_state:
return
Expand Down
44 changes: 42 additions & 2 deletions tests/featured/featured_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,8 @@ def test_feature_resync(self):
}
mock_db.get_entry.return_value = None
feature_handler.sync_state_field(feature_table)
mock_db.mod_entry.assert_called_with('FEATURE', 'sflow', {'state': 'enabled'})
# Missing FEATURE row: resync_feature_state skips mod_entry (avoid recreating a row removed concurrently).
mock_db.mod_entry.assert_not_called()
mock_db.mod_entry.reset_mock()

feature_handler = featured.FeatureHandler(mock_db, mock_feature_state_table, {}, False)
Expand Down Expand Up @@ -321,7 +322,46 @@ def test_feature_resync(self):
}
feature_handler.sync_state_field(feature_table)
mock_db.mod_entry.assert_called_with('FEATURE', 'sflow', {'state': 'enabled'})

mock_db.mod_entry.reset_mock()

# Partial entry (no 'state' key): left behind by sync_feature_scope racing with
# deregister. resync_feature_state must delete it rather than completing it.
feature_handler = featured.FeatureHandler(mock_db, mock_feature_state_table, {}, False)
mock_db.get_entry.return_value = {
'has_per_asic_scope': 'False',
'has_global_scope': 'True',
}
mock_db.set_entry = mock.MagicMock()
feature_handler.sync_state_field(feature_table)
mock_db.set_entry.assert_called_with('FEATURE', 'sflow', None)
mock_db.mod_entry.assert_not_called()

def test_sync_feature_scope_toctou(self):
"""sync_feature_scope must delete a partial entry it accidentally creates when
the FEATURE row is deleted by a concurrent deregister between the mod_entry
calls and the post-write existence check (TOCTOU)."""
mock_db = mock.MagicMock()
mock_feature_state_table = mock.MagicMock()

feature_handler = featured.FeatureHandler(mock_db, mock_feature_state_table, {}, False)
feature = featured.Feature('sflow', {
'state': 'disabled',
'has_global_scope': 'True',
'has_per_asic_scope': 'False',
'delayed': 'False',
})

# Post-write check sees a partial entry (no 'state' key): the FEATURE row was
# deleted by deregister and then recreated by mod_entry during the race
# window, leaving only the scope fields.
mock_db.get_entry.return_value = {
'has_per_asic_scope': 'False',
'has_global_scope': 'True',
}

feature_handler.sync_feature_scope(feature)
mock_db.set_entry.assert_called_with(featured.FEATURE_TBL, 'sflow', None)

def test_port_init_done_twice(self):
"""There could be multiple "PortInitDone" event in case of swss
restart(either due to crash or due to manual operation). swss
Expand Down