-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Add lazy loading for io counter to prevent crash on package import #45275
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,10 +48,8 @@ | |
| # _PROCESS.io_counters() is not available on Mac OS and some Linux distros. | ||
| _IO_AVAILABLE = hasattr(_PROCESS, "io_counters") | ||
| _IO_LAST_COUNT = 0 | ||
| if _IO_AVAILABLE: | ||
| _io_counters_initial = _PROCESS.io_counters() | ||
| _IO_LAST_COUNT = _io_counters_initial.read_bytes + _io_counters_initial.write_bytes | ||
| _IO_LAST_TIME = datetime.now() | ||
| _IO_INITIALIZED = False | ||
| # Processor Time % | ||
| _LAST_CPU_TIMES = psutil.cpu_times() | ||
| # Request Rate | ||
|
|
@@ -168,25 +166,55 @@ def _get_process_io(options: CallbackOptions) -> Iterable[Observation]: | |
| :rtype: ~typing.Iterable[~opentelemetry.metrics.Observation] | ||
| """ | ||
| try: | ||
| if not _IO_AVAILABLE: | ||
| yield Observation(0, {}) | ||
| return | ||
| # pylint: disable=global-statement | ||
| global _IO_LAST_COUNT | ||
| # pylint: disable=global-statement | ||
| global _IO_LAST_TIME | ||
| # RSS is non-swapped physical memory a process has used | ||
| io_counters = _PROCESS.io_counters() | ||
| rw_count = io_counters.read_bytes + io_counters.write_bytes | ||
| # pylint: disable=global-statement | ||
| global _IO_INITIALIZED | ||
| # pylint: disable=global-statement | ||
| global _IO_AVAILABLE | ||
|
|
||
| # Lazily initialize counters to avoid import-time failures in restricted environments. | ||
| if not _IO_INITIALIZED: | ||
| _IO_INITIALIZED = True | ||
| if _IO_AVAILABLE: | ||
| io_counters_func = getattr(_PROCESS, "io_counters", None) | ||
| if not callable(io_counters_func): | ||
| _IO_AVAILABLE = False | ||
| yield Observation(0, {}) | ||
| return | ||
| io_counters_initial = io_counters_func() | ||
| _IO_LAST_COUNT = int(getattr(io_counters_initial, "read_bytes", 0)) + int( | ||
| getattr(io_counters_initial, "write_bytes", 0) | ||
| ) | ||
| _IO_LAST_TIME = datetime.now() | ||
|
|
||
| if not _IO_AVAILABLE: | ||
| yield Observation(0, {}) | ||
| return | ||
|
|
||
| io_counters_func = getattr(_PROCESS, "io_counters", None) | ||
| if not callable(io_counters_func): | ||
| _IO_AVAILABLE = False | ||
| yield Observation(0, {}) | ||
| return | ||
|
|
||
| io_counters = io_counters_func() | ||
| rw_count = int(getattr(io_counters, "read_bytes", 0)) + int(getattr(io_counters, "write_bytes", 0)) | ||
|
Comment on lines
+178
to
+204
|
||
| rw_diff = rw_count - _IO_LAST_COUNT | ||
| _IO_LAST_COUNT = rw_count | ||
| current_time = datetime.now() | ||
| elapsed_time_s = (current_time - _IO_LAST_TIME).total_seconds() | ||
| _IO_LAST_TIME = current_time | ||
| if elapsed_time_s <= 0: | ||
| yield Observation(0, {}) | ||
| return | ||
| io_rate = rw_diff / elapsed_time_s | ||
| yield Observation(io_rate, {}) | ||
| except (psutil.NoSuchProcess, psutil.AccessDenied, Exception) as e: # pylint: disable=broad-except | ||
| _logger.exception("Error getting process I/O rate: %s", e) | ||
| _IO_AVAILABLE = False | ||
| _logger.debug("Disabling process I/O counter due to inaccessible io_counters: %s", e) | ||
| yield Observation(0, {}) | ||
|
Comment on lines
215
to
218
|
||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -57,6 +57,7 @@ def setUp(self): | |
| manager_module._IO_AVAILABLE = True | ||
| manager_module._IO_LAST_COUNT = 0 | ||
| manager_module._IO_LAST_TIME = datetime.now() | ||
| manager_module._IO_INITIALIZED = False | ||
| manager_module._REQUESTS_COUNT = 0 | ||
| manager_module._EXCEPTIONS_COUNT = 0 | ||
| manager_module._LAST_REQUEST_RATE_TIME = datetime.now() | ||
|
|
@@ -167,6 +168,7 @@ def test_get_process_io_success(self, mock_process, mock_datetime): | |
| # Import and modify global variables | ||
| import azure.monitor.opentelemetry.exporter._performance_counters._manager as manager_module | ||
|
|
||
| manager_module._IO_INITIALIZED = True | ||
| manager_module._IO_LAST_COUNT = 3000 # Previous total | ||
| manager_module._IO_LAST_TIME = start_time | ||
|
|
||
|
|
@@ -193,6 +195,7 @@ def test_get_process_io_unavailable(self, mock_process, mock_datetime): | |
| import azure.monitor.opentelemetry.exporter._performance_counters._manager as manager_module | ||
|
|
||
| manager_module._IO_AVAILABLE = 0 # Previous total | ||
| manager_module._IO_INITIALIZED = True | ||
| manager_module._IO_LAST_COUNT = 0 # Previous total | ||
| manager_module._IO_LAST_TIME = start_time | ||
|
|
||
|
|
@@ -201,6 +204,37 @@ def test_get_process_io_unavailable(self, mock_process, mock_datetime): | |
| self.assertEqual(len(result), 1) | ||
| self.assertAlmostEqual(result[0].value, 0.0) | ||
|
|
||
| @mock.patch("azure.monitor.opentelemetry.exporter._performance_counters._manager._PROCESS") | ||
| def test_get_process_io_access_denied_disables_io(self, mock_process): | ||
| """Test process I/O retrieval disables I/O counters when access is denied.""" | ||
| mock_process.io_counters.side_effect = psutil.AccessDenied(1) | ||
|
|
||
| import azure.monitor.opentelemetry.exporter._performance_counters._manager as manager_module | ||
|
|
||
| manager_module._IO_AVAILABLE = True | ||
| manager_module._IO_INITIALIZED = False | ||
|
|
||
| result = list(_get_process_io(None)) | ||
|
|
||
| self.assertEqual(len(result), 1) | ||
| self.assertEqual(result[0].value, 0) | ||
| self.assertFalse(manager_module._IO_AVAILABLE) | ||
| mock_process.io_counters.assert_called_once_with() | ||
|
|
||
| @mock.patch("azure.monitor.opentelemetry.exporter._performance_counters._manager._PROCESS") | ||
| def test_get_process_io_disabled_does_not_call_io_counters(self, mock_process): | ||
| """Test process I/O retrieval does not call io_counters when unavailable.""" | ||
| import azure.monitor.opentelemetry.exporter._performance_counters._manager as manager_module | ||
|
|
||
| manager_module._IO_AVAILABLE = False | ||
| manager_module._IO_INITIALIZED = True | ||
|
|
||
| result = list(_get_process_io(None)) | ||
|
|
||
| self.assertEqual(len(result), 1) | ||
| self.assertEqual(result[0].value, 0) | ||
| mock_process.io_counters.assert_not_called() | ||
|
Comment on lines
+207
to
+236
|
||
|
|
||
| @mock.patch("psutil.cpu_times") | ||
| def test_get_processor_time_success(self, mock_cpu_times): | ||
| """Test successful processor time retrieval.""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not just add a try catch around this initial call, and if it fails, set _IO_AVAILABLE to false. That would make this a ~3 line change.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tbequinor I’ll open a separate PR with this fix and follow up once it’s included in a new release. We can close this PR for now, and I’ll create an issue to track the change. Link to the issue - #45283