Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changes/394.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixed Junos reboots not being detected when waiting for the device to reload.
Comment thread
Defiantearth marked this conversation as resolved.
Increased the default Junos reboot wait timeout from 1 hour to 2 hours.
93 changes: 70 additions & 23 deletions pyntc/devices/jnpr_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,18 +228,53 @@ def _uptime_to_string(self, uptime_full_string):
days, hours, minutes, seconds = self._uptime_components(uptime_full_string)
return f"{days:02d}:{hours:02d}:{minutes:02d}:{seconds:02d}"

def _wait_for_device_reboot(self, timeout=3600):
def _wait_for_device_reboot(self, original_uptime, timeout=7200):
"""Block until the device reboots and accepts a fresh connection.

Drops the existing NETCONF session and polls for the device to come back.
The reboot is considered complete when a new connection succeeds and the
device reports an uptime lower than ``original_uptime`` (i.e., it has
booted since the reboot was issued).

The pre-reboot session must be discarded first: once the device restarts,
PyEZ still reports it as connected even though the transport is dead, so it
is closed here to force each probe to establish a fresh connection.

Args:
original_uptime (int): Device uptime in seconds captured before the reboot.
timeout (int, optional): Max seconds to wait for the device to return. Defaults to 2 hours.
"""
start = time.time()
disconnected = False

# Drop the pre-reboot NETCONF session so subsequent probes can't read from
# a stale connection PyEZ still reports as connected.
try:
self.close()
except Exception as close_exc: # pylint: disable=broad-exception-caught
log.debug("Host %s: Pre-reboot disconnect raised %s (ignored).", self.host, close_exc)

while time.time() - start < timeout:
if disconnected:
try:
self.open()
try:
self.open()
self._uptime = None
current_uptime = self.uptime
if current_uptime is not None and current_uptime < original_uptime:
log.info(
"Host %s: Device rebooted (uptime %ss < pre-reboot %ss).",
self.host,
current_uptime,
original_uptime,
)
return
except: # noqa E722 # nosec # pylint: disable=bare-except
pass
elif not self.connected:
disconnected = True
log.debug(
"Host %s: Reachable but uptime %ss >= pre-reboot %ss; still waiting.",
self.host,
current_uptime,
original_uptime,
)
except Exception as exc: # pylint: disable=broad-exception-caught
log.debug("Host %s: Reboot probe failed (%s); will retry.", self.host, exc)
self.native.connected = False
time.sleep(10)

raise RebootTimeoutError(hostname=self.hostname, wait_time=timeout)
Expand Down Expand Up @@ -318,12 +353,14 @@ def uptime(self):
Returns:
(int): Device uptime in seconds.
"""
try:
native_uptime_string = self.native.facts["RE0"]["up_time"]
except (AttributeError, TypeError):
native_uptime_string = None

if self._uptime is None:
try:
# Bust PyEZ's cached facts so a cold cache always reflects the live device.
self.native.facts_refresh(keys="RE0")
native_uptime_string = self.native.facts["RE0"]["up_time"]
except (AttributeError, TypeError, KeyError):
native_uptime_string = None

Comment thread
Defiantearth marked this conversation as resolved.
if native_uptime_string is not None:
self._uptime = self._uptime_to_seconds(native_uptime_string)

Expand All @@ -337,13 +374,16 @@ def uptime_string(self):
Returns:
(str): Device uptime.
"""
try:
native_uptime_string = self.native.facts["RE0"]["up_time"]
except (AttributeError, TypeError):
native_uptime_string = None

if self._uptime_string is None:
self._uptime_string = self._uptime_to_string(native_uptime_string)
try:
# Bust PyEZ's cached facts so a cold cache always reflects the live device.
self.native.facts_refresh(keys="RE0")
native_uptime_string = self.native.facts["RE0"]["up_time"]
except (AttributeError, TypeError, KeyError):
native_uptime_string = None

if native_uptime_string is not None:
self._uptime_string = self._uptime_to_string(native_uptime_string)

return self._uptime_string

Expand Down Expand Up @@ -505,13 +545,13 @@ def open(self):
if not self.connected:
self.native.open()

def reboot(self, wait_for_reload=False, timeout=3600, confirm=None):
def reboot(self, wait_for_reload=False, timeout=7200, confirm=None):
"""
Reload the controller or controller pair.

Args:
wait_for_reload (bool): Whether the reboot method should wait for the device to come back up before returning. Defaults to False.
timeout (int, optional): Time in seconds to wait for the device to return after reboot. Defaults to 1 hour.
timeout (int, optional): Time in seconds to wait for the device to return after reboot. Defaults to 2 hours.
confirm (None): Not used. Deprecated since v0.17.0.

Example:
Expand All @@ -522,9 +562,16 @@ def reboot(self, wait_for_reload=False, timeout=3600, confirm=None):
if confirm is not None:
warnings.warn("Passing 'confirm' to reboot method is deprecated.", DeprecationWarning)

self._uptime = None
original_uptime = self.uptime
Comment thread
Defiantearth marked this conversation as resolved.
if original_uptime is None:
raise CommandError(
command="reboot",
message="Could not determine pre-reboot uptime; refusing to wait for reload.",
)
self.sw.reboot(in_min=0)
if wait_for_reload:
self._wait_for_device_reboot(timeout=timeout)
self._wait_for_device_reboot(original_uptime, timeout=timeout)

def rollback(self, filename):
"""Rollback to a specific configuration file.
Expand Down
82 changes: 71 additions & 11 deletions tests/unit/test_devices/test_jnpr_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,28 @@ def test_reboot(self):

@mock.patch("pyntc.devices.jnpr_device.time.sleep")
def test_wait_for_device_to_reboot(self, mock_sleep):
with mock.patch.object(self.device, "open") as mock_open:
# Emulate the device disconnected and reconnecting
type(self.device.native).connected = mock.PropertyMock(side_effect=[True, False, True])
mock_open.side_effect = [Exception, Exception, True]
self.device.reboot(wait_for_reload=True, timeout=3)
mock_open.assert_has_calls([mock.call()] * 3)
"""Reboot completes when the device returns with a lower uptime than the baseline."""
with (
mock.patch.object(self.device, "open") as mock_open,
mock.patch.object(self.device, "close"),
mock.patch.object(type(self.device), "uptime", new_callable=mock.PropertyMock) as mock_uptime,
):
# First read is the pre-reboot baseline; second is the post-reboot uptime.
mock_uptime.side_effect = [455, 30]
self.device.reboot(wait_for_reload=True, timeout=30)

self.device.sw.reboot.assert_called_with(in_min=0)
mock_open.assert_called()

@mock.patch("pyntc.devices.jnpr_device.time.sleep")
def test_wait_for_device_to_reboot_error(self, mock_sleep):
with mock.patch.object(self.device, "open") as mock_open:
type(self.device.native).connected = mock.PropertyMock(side_effect=[True, False])
mock_open.side_effect = Exception
"""Raise RebootTimeoutError when the device never reports a lower uptime within the timeout."""
with (
mock.patch.object(self.device, "open"),
mock.patch.object(self.device, "close"),
mock.patch.object(type(self.device), "uptime", new_callable=mock.PropertyMock) as mock_uptime,
):
mock_uptime.return_value = 455
with pytest.raises(RebootTimeoutError):
self.device.reboot(wait_for_reload=True, timeout=1)

Expand Down Expand Up @@ -290,12 +300,62 @@ def test_checkpoint(self, mock_scp):
self.device.show.assert_called_with("show config")

def test_uptime(self):
"""Cold cache (_uptime is None) refreshes facts and parses the uptime."""
self.assertIsNone(self.device._uptime)
uptime = self.device.uptime
self.assertEqual(uptime, 455)
self.device.native.facts_refresh.assert_called_once_with(keys="RE0")

def test_uptime_cached(self):
"""A populated cache is returned as-is, with no device round-trip."""
self.device._uptime = 1234
uptime = self.device.uptime
assert uptime == 455
self.assertEqual(uptime, 1234)
self.device.native.facts_refresh.assert_not_called()

def test_uptime_refreshes_after_cache_cleared(self):
"""Clearing the cache forces a fresh read."""
self.assertEqual(self.device.uptime, 455)

self.device._uptime = None
self.device.native.facts = {"RE0": {"up_time": "30 seconds"}}

self.assertEqual(self.device.uptime, 30)

def test_uptime_none_when_facts_unavailable(self):
"""Missing/unavailable facts return None gracefully instead of raising an Exception."""
self.device._uptime = None
self.device.native.facts = {}
self.assertIsNone(self.device.uptime)

def test_uptime_string(self):
"""Cold cache (_uptime_string is None) refreshes facts and formats the uptime."""
self.assertIsNone(self.device._uptime_string)
uptime_string = self.device.uptime_string
self.assertEqual(uptime_string, "00:00:07:35")
self.device.native.facts_refresh.assert_called_once_with(keys="RE0")

def test_uptime_string_cached(self):
"""A populated cache is returned as-is, with no device round-trip."""
self.device._uptime_string = "01:02:03:04"
uptime_string = self.device.uptime_string
assert uptime_string == "00:00:07:35"
self.assertEqual(uptime_string, "01:02:03:04")
self.device.native.facts_refresh.assert_not_called()

def test_uptime_string_refreshes_after_cache_cleared(self):
"""Clearing the cache forces a fresh read."""
self.assertEqual(self.device.uptime_string, "00:00:07:35")

self.device._uptime_string = None
self.device.native.facts = {"RE0": {"up_time": "30 seconds"}}

self.assertEqual(self.device.uptime_string, "00:00:00:30")

def test_uptime_string_none_when_facts_unavailable(self):
"""Missing/unavailable facts return None gracefully instead of raising an Exception."""
self.device._uptime_string = None
self.device.native.facts = {}
self.assertIsNone(self.device.uptime_string)

def test_vendor(self):
vendor = self.device.vendor
Expand Down
Loading