Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions examples/fetch-runner.service
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,23 @@ StandardError=journal

# NoNewPrivileges / RestrictSUIDSGID block sudo's setuid transition, so
# they are off here. The sudoers fragment is what bounds the privilege.
# If every job uses run_as = [general].user, you can re-enable both.
CapabilityBoundingSet=
# If every job uses run_as = [general].user, you can drop the capability
# list below to empty and set NoNewPrivileges=true and RestrictSUIDSGID=true.
#
# The bounding set must permit the caps sudo needs to switch uid/gid and
# initialize its audit plugin
CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_AUDIT_WRITE CAP_SYS_RESOURCE CAP_DAC_READ_SEARCH CAP_CHOWN CAP_FOWNER

# AmbientCapabilities stays empty — sudo gains privilege via its setuid-root
# binary, not via ambient inheritance. The CAP_SETUID bounding set above permits
# it to keep the caps it needs after the setuid exec.
AmbientCapabilities=

# Keep the filesystem read-only by default, then punch narrow write holes back
# in with `ReadWritePaths=` above for the repos and app state that deployments
# genuinely need to modify.
ProtectSystem=strict
# Keep /usr, /boot, /efi, and /etc read-only, then punch narrow write holes
# back in with `ReadWritePaths=` above for the repos and app state that
# deployments genuinely need to modify. Using `strict` breaks the setuid
# transition because sudo needs to write runtime state under /run and /var.
ProtectSystem=full

# Deploy scripts should not need the service user's home directory.
ProtectHome=read-only
Expand All @@ -69,9 +78,12 @@ ProtectControlGroups=true
ProtectClock=true
ProtectHostname=true

# Hide unrelated processes from the service; deploy hooks should not be
# inspecting the rest of the machine.
ProtectProc=invisible
# Left at `default` (no hiding) because `invisible` makes /proc/1 unreadable
# to the sandbox, and PAM (via sudo) then logs a stream of "Could not read
# /proc/1/limits" warnings on every fetch. Tighten to `invisible` if you can
# live with the noise — deploy hooks have no legitimate need to inspect
# other processes.
ProtectProc=default

# Kernel / namespace hardening.
LockPersonality=true
Expand Down
4 changes: 4 additions & 0 deletions examples/jobs.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
# runs its git ops and script. Defaults to [general].user. When set,
# sudo is used and a sudoers rule is required
# (see `fetch-runner --print-sudoers <this-file>`).
# [[jobs]].branch (optional) — fixed branch to track. Omit to follow
# whatever branch is currently checked out in the working tree, so an
# operator can `git checkout` a different branch and fetch-runner will
# start tracking it on the next poll. Detached HEAD is skipped.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've always felt that "detached HEAD" was a gruesome way to describe this function of git.

#
# Convention: each repo lives at /srv/<run_as>/<repo_name>/.

Expand Down
45 changes: 27 additions & 18 deletions src/fetch_runner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ class ConfigError(Exception):
class ConfiguredJob:
name: str
repo_path: Path
branch_name: str
# None means "follow whatever branch is currently checked out in the
# working tree" — resolved live on each poll. Useful for dev deployments
# where the operator switches branches manually.
branch_name: str | None
script_path: Path
script_timeout_seconds: int | None
# The user this job's git ops and script run as. Defaults to
Expand Down Expand Up @@ -123,23 +126,29 @@ def load_config(config_path: Path) -> RunnerConfig:
f"{config_path}: {section_label}.path {repo_path} is not a git repository"
)

branch_name = _require_non_empty_string(
raw_job_section,
"branch",
section_label,
config_path,
)
# Branch names are passed as argv entries, but git still interprets
# leading dashes and a wide range of refname syntax. A conservative
# character filter keeps the allowed surface area easy to reason about.
if branch_name.startswith("-") or any(
char in _DISALLOWED_BRANCH_CHARACTERS for char in branch_name
):
raise ConfigError(
f"{config_path}: {section_label}.branch contains unsafe characters: {branch_name!r}"
)
if len(branch_name) > 128:
raise ConfigError(f"{config_path}: {section_label}.branch too long")
# Omitting `branch` means "follow the current checked-out branch" —
# resolved live on each poll. Present-but-empty is still rejected.
raw_branch = raw_job_section.get("branch")
if raw_branch is None:
branch_name: str | None = None
else:
if not isinstance(raw_branch, str) or not raw_branch:
raise ConfigError(
f"{config_path}: {section_label}.branch must be a non-empty string"
)
branch_name = raw_branch
# Branch names are passed as argv entries, but git still interprets
# leading dashes and a wide range of refname syntax. A conservative
# character filter keeps the allowed surface area easy to reason about.
if branch_name.startswith("-") or any(
char in _DISALLOWED_BRANCH_CHARACTERS for char in branch_name
):
raise ConfigError(
f"{config_path}: {section_label}.branch contains unsafe characters: "
f"{branch_name!r}"
)
if len(branch_name) > 128:
raise ConfigError(f"{config_path}: {section_label}.branch too long")

script_path = Path(
_require_non_empty_string(raw_job_section, "script", section_label, config_path)
Expand Down
19 changes: 19 additions & 0 deletions src/fetch_runner/git_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,25 @@ def _run_git_command(
return result.stdout.strip()


def git_get_current_branch(
repo_path: Path,
*,
run_as_user_name: str,
) -> str | None:
"""Return the branch HEAD points at, or ``None`` for detached HEAD."""
try:
return _run_git_command(
repo_path,
"symbolic-ref",
"--quiet",
"--short",
"HEAD",
run_as_user_name=run_as_user_name,
)
except GitError:
return None


def git_get_local_branch_commit_sha(
repo_path: Path,
branch_name: str,
Expand Down
42 changes: 35 additions & 7 deletions src/fetch_runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .git_ops import GitError
from .git_ops import git_fetch_branch_from_origin
from .git_ops import git_force_checkout_branch_to_commit
from .git_ops import git_get_current_branch
from .git_ops import git_get_local_branch_commit_sha
from .guard import render_sudo_argv
from .guard import validate_canonical_script_guard
Expand Down Expand Up @@ -63,34 +64,60 @@ def run_forever(self) -> int:

def _initialize_last_processed_commits(self) -> None:
for configured_job in self.runner_config.jobs:
resolved_branch = self._resolve_branch_for_job(configured_job)
if resolved_branch is None:
# Dynamic-branch job whose working tree is detached or
# unreadable at startup. Leave the cursor empty; the first
# successful poll will seed it.
self._last_processed_commit_by_job_name[configured_job.name] = ""
log.info("job %s: initial commit <unresolved>", configured_job.name)
continue
try:
# Seed each job from the current local branch tip so a service
# restart does not replay the last successfully fetched commit.
initial_commit_sha = git_get_local_branch_commit_sha(
configured_job.repo_path,
configured_job.branch_name,
resolved_branch,
run_as_user_name=configured_job.run_as_user,
)
except GitError as e:
log.warning(
"job %s: cannot read initial commit for %s: %s",
configured_job.name,
configured_job.branch_name,
resolved_branch,
e,
)
initial_commit_sha = ""
self._last_processed_commit_by_job_name[configured_job.name] = initial_commit_sha
log.info(
"job %s: initial commit %s",
"job %s: initial commit %s on %s",
configured_job.name,
_short_commit_sha(initial_commit_sha),
resolved_branch,
)

def _resolve_branch_for_job(self, configured_job: ConfiguredJob) -> str | None:
"""Return the branch this poll should act on, or ``None`` if a
dynamic-branch job's working tree is detached / unresolvable."""
if configured_job.branch_name is not None:
return configured_job.branch_name
return git_get_current_branch(
configured_job.repo_path,
run_as_user_name=configured_job.run_as_user,
)

def _poll_job_for_new_commit(self, configured_job: ConfiguredJob) -> None:
resolved_branch = self._resolve_branch_for_job(configured_job)
if resolved_branch is None:
log.warning(
"job %s: cannot resolve current branch (detached HEAD?); skipping",
configured_job.name,
)
return
try:
fetched_commit_sha = git_fetch_branch_from_origin(
configured_job.repo_path,
configured_job.branch_name,
resolved_branch,
run_as_user_name=configured_job.run_as_user,
)
except GitError as e:
Expand All @@ -116,7 +143,7 @@ def _poll_job_for_new_commit(self, configured_job: ConfiguredJob) -> None:
try:
git_force_checkout_branch_to_commit(
configured_job.repo_path,
configured_job.branch_name,
resolved_branch,
fetched_commit_sha,
run_as_user_name=configured_job.run_as_user,
)
Expand All @@ -142,20 +169,21 @@ def _poll_job_for_new_commit(self, configured_job: ConfiguredJob) -> None:
# action, not an automatic tight loop.
self._last_processed_commit_by_job_name[configured_job.name] = fetched_commit_sha
return
self._run_job_script_for_commit(configured_job, fetched_commit_sha)
self._run_job_script_for_commit(configured_job, resolved_branch, fetched_commit_sha)
self._last_processed_commit_by_job_name[configured_job.name] = fetched_commit_sha

def _run_job_script_for_commit(
self,
configured_job: ConfiguredJob,
branch_name: str,
commit_sha: str,
) -> None:
# Export execution context so scripts can log or branch on it without
# having to re-run git commands against the working tree.
script_environment = {
**os.environ,
"FETCH_RUNNER_JOB": configured_job.name,
"FETCH_RUNNER_BRANCH": configured_job.branch_name,
"FETCH_RUNNER_BRANCH": branch_name,
"FETCH_RUNNER_COMMIT": commit_sha,
"FETCH_RUNNER_REPO": str(configured_job.repo_path),
}
Expand Down
21 changes: 21 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,27 @@ def test_load_happy_path(tmp_path: Path):
assert runner_config.jobs[0].script_timeout_seconds is None


def test_load_allows_branch_to_be_omitted(tmp_path: Path):
user_name = get_current_real_uid_user_name()
repo_path = _create_repo_directory(tmp_path / "repo")
script_path = _create_guarded_script(tmp_path / "deploy.sh", user_name)
config_path = _write_jobs_toml(
tmp_path / "jobs.toml",
f"""
[general]
user = "{user_name}"
poll_interval_seconds = 30

[[jobs]]
name = "j1"
path = "{repo_path}"
script = "{script_path}"
""",
)
runner_config = load_config(config_path)
assert runner_config.jobs[0].branch_name is None


def test_load_rejects_wrong_user(tmp_path: Path):
# A jobs.toml whose user does not match the running user must be refused.
repo_path = _create_repo_directory(tmp_path / "repo")
Expand Down
6 changes: 3 additions & 3 deletions tests/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_runner_invokes_script_directly_when_run_as_matches_runtime_user(tmp_pat
runner, job = _make_runner(run_as_user=current_user_name, script_path=script_path)
with mock.patch("fetch_runner.runner.subprocess.run") as mocked_subprocess_run:
mocked_subprocess_run.return_value = mock.Mock(returncode=0)
runner._run_job_script_for_commit(job, "deadbeef" * 5)
runner._run_job_script_for_commit(job, "main", "deadbeef" * 5)
invocation_argv = mocked_subprocess_run.call_args.args[0]
assert invocation_argv == [str(script_path)]

Expand All @@ -59,7 +59,7 @@ def test_runner_invokes_script_via_sudo_when_run_as_differs(tmp_path: Path):
runner, job = _make_runner(run_as_user="someone-else", script_path=script_path)
with mock.patch("fetch_runner.runner.subprocess.run") as mocked_subprocess_run:
mocked_subprocess_run.return_value = mock.Mock(returncode=0)
runner._run_job_script_for_commit(job, "cafef00d" * 5)
runner._run_job_script_for_commit(job, "main", "cafef00d" * 5)
invocation_argv = mocked_subprocess_run.call_args.args[0]
assert invocation_argv[0] == "sudo"
assert invocation_argv[1] == "-n"
Expand All @@ -75,7 +75,7 @@ def test_runner_passes_fetch_runner_env_vars_through_subprocess(tmp_path: Path):
runner, job = _make_runner(run_as_user="someone-else", script_path=script_path)
with mock.patch("fetch_runner.runner.subprocess.run") as mocked_subprocess_run:
mocked_subprocess_run.return_value = mock.Mock(returncode=0)
runner._run_job_script_for_commit(job, "1234567890ab")
runner._run_job_script_for_commit(job, "main", "1234567890ab")
passed_env = mocked_subprocess_run.call_args.kwargs["env"]
assert passed_env["FETCH_RUNNER_JOB"] == "j"
assert passed_env["FETCH_RUNNER_BRANCH"] == "main"
Expand Down