diff --git a/agent/src/testflinger_agent/job.py b/agent/src/testflinger_agent/job.py index 41b0e82ba..4f936247b 100644 --- a/agent/src/testflinger_agent/job.py +++ b/agent/src/testflinger_agent/job.py @@ -67,10 +67,15 @@ def __init__(self, job_data, client): ) def get_runner(self, rundir: str, phase: TestPhase): + output_polling_interval = self.client.config["output_polling_interval"] try: secrets = self.job_data[f"{phase}_data"]["secrets"] except KeyError: - return CommandRunner(cwd=rundir, env=self.client.config) + return CommandRunner( + cwd=rundir, + env=self.client.config, + output_polling_interval=output_polling_interval, + ) # inject phase secrets into the environment environment = { @@ -83,6 +88,7 @@ def get_runner(self, rundir: str, phase: TestPhase): return MaskingCommandRunner( cwd=rundir, env=environment, + output_polling_interval=output_polling_interval, masker=Masker( patterns=list(secrets.values()), hash_length=self._hash_length ), diff --git a/agent/src/testflinger_agent/runner.py b/agent/src/testflinger_agent/runner.py index 6f8ca5110..157c08287 100644 --- a/agent/src/testflinger_agent/runner.py +++ b/agent/src/testflinger_agent/runner.py @@ -48,13 +48,19 @@ class CommandRunner: known event types are defined in RunnerEvents. """ - def __init__(self, cwd: Optional[str], env: Optional[dict]): + def __init__( + self, + cwd: Optional[str], + env: Optional[dict], + output_polling_interval: float = 10.0, + ): self.output_handlers: List[OutputHandlerType] = [] self.stop_condition_checkers: List[StopConditionType] = [] self.process: Optional[subprocess.Popen] = None self.cwd = cwd self.env = os.environ.copy() self.events = defaultdict(list) + self.output_polling_interval = output_polling_interval if env: self.env.update( {k: str(v) for k, v in env.items() if isinstance(v, str)} @@ -136,7 +142,7 @@ def run(self, cmd: str) -> Tuple[int, Optional[TestEvent], str]: time.sleep(1) while self.process.poll() is None: - time.sleep(10) + time.sleep(self.output_polling_interval) stop_event, stop_reason = self.check_stop_conditions() if stop_event is not None: diff --git a/agent/tests/test_agent.py b/agent/tests/test_agent.py index 1009e8a6a..06b6d9e39 100644 --- a/agent/tests/test_agent.py +++ b/agent/tests/test_agent.py @@ -390,7 +390,8 @@ def test_phase_failed(self, agent, requests_mock): def test_phase_timeout(self, agent, requests_mock): # Make sure the status code of a timed-out phase is correct - self.config["test_command"] = "sleep 12" + self.config["output_polling_interval"] = 0.1 + self.config["test_command"] = "sleep 2" mock_job_data = { "job_id": str(uuid.uuid1()), "job_queue": "test", @@ -600,7 +601,8 @@ def test_post_agent_status_update_cancelled(self, agent, requests_mock): def test_post_agent_status_update_global_timeout( self, agent, requests_mock ): - self.config["test_command"] = "sleep 12" + self.config["output_polling_interval"] = 0.1 + self.config["test_command"] = "sleep 2" job_id = str(uuid.uuid1()) fake_job_data = { "job_id": job_id, @@ -637,7 +639,8 @@ def test_post_agent_status_update_global_timeout( def test_post_agent_status_update_output_timeout( self, agent, requests_mock ): - self.config["test_command"] = "sleep 12" + self.config["output_polling_interval"] = 0.1 + self.config["test_command"] = "sleep 2" job_id = str(uuid.uuid1()) fake_job_data = { "job_id": job_id, diff --git a/agent/tests/test_job.py b/agent/tests/test_job.py index a5de1f83d..4eb3be44f 100644 --- a/agent/tests/test_job.py +++ b/agent/tests/test_job.py @@ -76,12 +76,12 @@ def test_job_global_timeout(self, tmp_path): """Test that timeout from job_data is respected.""" timeout_str = "ERROR: Global timeout reached! (1s)" logfile = tmp_path / "testlog" - runner = CommandRunner(tmp_path, env={}) + runner = CommandRunner(tmp_path, env={}, output_polling_interval=0.1) log_handler = FileLogHandler(logfile) runner.register_output_handler(log_handler) global_timeout_checker = GlobalTimeoutChecker(1) runner.register_stop_condition_checker(global_timeout_checker) - exit_code, exit_event, exit_reason = runner.run("sleep 12") + exit_code, exit_event, exit_reason = runner.run("sleep 2") with open(logfile) as log: log_data = log.read() assert timeout_str in log_data @@ -101,14 +101,12 @@ def test_job_output_timeout(self, tmp_path): """Test that output timeout from job_data is respected.""" timeout_str = "ERROR: Output timeout reached! (1s)" logfile = tmp_path / "testlog" - runner = CommandRunner(tmp_path, env={}) + runner = CommandRunner(tmp_path, env={}, output_polling_interval=0.1) log_handler = FileLogHandler(logfile) runner.register_output_handler(log_handler) output_timeout_checker = OutputTimeoutChecker(1) runner.register_stop_condition_checker(output_timeout_checker) - # unfortunately, we need to sleep for longer that 10 seconds here - # or else we fall under the polling time - exit_code, exit_event, exit_reason = runner.run("sleep 12") + exit_code, exit_event, exit_reason = runner.run("sleep 2") with open(logfile) as log: log_data = log.read() assert timeout_str in log_data @@ -131,9 +129,8 @@ def test_no_output_timeout_in_provision( timeout_str = "complete\n" logfile = tmp_path / "provision.log" fake_job_data = {"output_timeout": 1, "provision_data": {"url": "foo"}} - self.config["provision_command"] = ( - "bash -c 'sleep 12 && echo complete'" - ) + self.config["output_polling_interval"] = 0.1 + self.config["provision_command"] = "bash -c 'sleep 2 && echo complete'" requests_mock.post(rmock.ANY, status_code=HTTPStatus.OK) job = _TestflingerJob(fake_job_data, client) job.phase = "provision" @@ -142,9 +139,6 @@ def test_no_output_timeout_in_provision( with open(tmp_path / "testflinger-outcome.json", "w") as outcome_file: outcome_file.write("{}") - # unfortunately, we need to sleep for longer that 10 seconds here - # or else we fall under the polling time - # job.run_with_log("sleep 12 && echo complete", logfile) job.run_test_phase("provision", tmp_path) with open(logfile) as log: log_data = log.read() diff --git a/docs/reference/testflinger-agent-conf.rst b/docs/reference/testflinger-agent-conf.rst index 8e102c848..9d9c47c4f 100644 --- a/docs/reference/testflinger-agent-conf.rst +++ b/docs/reference/testflinger-agent-conf.rst @@ -15,7 +15,9 @@ The following configuration options are supported by the Testflinger Agent: * - ``identifier`` - Additional identifier such as a serial number that will be sent to the server and can be used for cross-referencing with other systems * - ``polling_interval`` - - Time to sleep between polling for new tests (default: 10s) + - Time to sleep between polling the server for new tests (default: 10s) + * - ``output_polling_interval`` + - Time to sleep between polling for output from the process running the phase command (default: 10s) * - ``server_address`` - Host/IP and port of the Testflinger server * - ``execution_basedir``