Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 79 additions & 4 deletions gator/common/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,13 @@ def get_username() -> str:
@overload
def as_couroutine(
fn: Callable[_P, Union[_R, Awaitable[_R]]],
) -> Callable[_P, Awaitable[_R]]:
...
) -> Callable[_P, Awaitable[_R]]: ...
except ImportError:

@overload
def as_couroutine(
fn: Callable[..., Union[_R, Awaitable[_R]]],
) -> Callable[..., Awaitable[_R]]:
...
) -> Callable[..., Awaitable[_R]]: ...


def as_couroutine(fn):
Expand All @@ -56,3 +54,80 @@ async def async_fn(*args, **kwargs):
return fn(*args, **kwargs)

return async_fn


def find_command_substitutions(text: str) -> list[tuple[int, int, str]]:
"""
Find all command substitutions $(cmd) and `cmd` in the text, handling
nested parentheses and quoted strings correctly.

Returns a list of (start_pos, end_pos, original_text) tuples.
"""
substitutions = []
i = 0

while i < len(text):
# Check for $(
if i < len(text) - 1 and text[i : i + 2] == "$(":
start = i
i += 2
depth = 1
in_single_quote = False
in_double_quote = False

# Find matching closing parenthesis, respecting quotes
while i < len(text) and depth > 0:
char = text[i]

# Handle backslash escaping (only in double quotes or outside quotes)
if char == "\\" and not in_single_quote and i + 1 < len(text):
i += 2 # Skip the backslash and next character
continue

# Handle single quotes (toggle state, but not inside double quotes)
if char == "'" and not in_double_quote:
in_single_quote = not in_single_quote
i += 1
continue

# Handle double quotes (toggle state, but not inside single quotes)
if char == '"' and not in_single_quote:
in_double_quote = not in_double_quote
i += 1
continue

# Only count parentheses when not inside any quotes
if not in_single_quote and not in_double_quote:
if char == "(":
depth += 1
elif char == ")":
depth -= 1

i += 1

if depth == 0:
# Found matching closing paren
substitutions.append((start, i, text[start:i]))
# else: unmatched - let it through and shell will error

# Check for backticks
elif text[i] == "`":
start = i
i += 1

# Find closing backtick (no nesting for backticks)
while i < len(text) and text[i] != "`":
# Handle escaped backticks
if text[i] == "\\" and i + 1 < len(text):
i += 2
else:
i += 1

if i < len(text) and text[i] == "`":
i += 1
substitutions.append((start, i, text[start:i]))
# else: unmatched - let it through
else:
i += 1

return substitutions
4 changes: 2 additions & 2 deletions gator/scheduler/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ async def _inner():
async with self.update_lock:
# Launch jobs
self.slots[task.ident] = granted
self.launched_processes[task.ident] = await asyncio.create_subprocess_shell(
" ".join(self.create_command(task, {"concurrency": granted})),
self.launched_processes[task.ident] = await asyncio.create_subprocess_exec(
*self.create_command(task, {"concurrency": granted}),
Comment on lines -106 to +107
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having re-read the asyncio documentation, I can see why an LLM check may have flagged this as a security vulnerability as it says create_subprocess_shell is at risk of injection vulnerabilities

However...

Given that the whole point of gator is that you can submit any arbitrary command through a job - I'd argue this is not a "security fix".

I'm in agreement with the change as there's still a benefit here in that a sub-shell is not invoked, which makes it slightly lighter on resources.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd mostly agree - I had the same initial view - and then I realised this subprocess invocation isn't supposed to be causing user-provided commands/args to be interpreted at this point on the host that runs this invocation. So, whether you think it's a security issue or not, it's a genuine problem that commands may end up running on a different machine / in a different place than the user may have intended or expected.

stdin=asyncio.subprocess.DEVNULL,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.STDOUT,
Expand Down
19 changes: 19 additions & 0 deletions gator/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .common.layer import BaseLayer, MetricResponse, UsageResponse
from .common.summary import Summary
from .common.types import Attribute, JobResult, LogSeverity, ProcStat
from .common.utility import find_command_substitutions


class Wrapper(BaseLayer):
Expand Down Expand Up @@ -246,6 +247,24 @@ async def __launch(self) -> None:
# Expand variables in the command
command = expandvars.expand(self.spec.command, environ=env)
args = [expandvars.expand(str(arg), environ=env) for arg in self.spec.args]
# Check for command substitutions and warn user they won't be evaluated (unless using shell)
common_shells = {"sh", "bash", "zsh", "ksh", "csh", "tcsh", "fish", "dash"}
is_shell_command = Path(command).name in common_shells
if not is_shell_command:
detected_substitutions = []
for substitution in find_command_substitutions(command):
detected_substitutions.append(substitution[2]) # Extract original_text
for arg in args:
for substitution in find_command_substitutions(arg):
detected_substitutions.append(substitution[2]) # Extract original_text
if detected_substitutions:
substitutions_list = "\n".join(f" {sub}" for sub in detected_substitutions)
await self.logger.warning(
"Gator only supports simple environment variable substitutions. "
"Command substitutions will pass through to the command without being "
"evaluated by Gator.\n"
f"Detected command substitutions:\n{substitutions_list}"
)
full_cmd = shlex.join((command, *args))
# Ensure the tracking directory exists
self.tracking.mkdir(parents=True, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ psutil = "^5.9.4"
rich = "^13.3.4"
tabulate = "^0.9.0"
pyyaml = "^6.0"
expandvars = "^0.9.0"
expandvars = "^1.1.2"
websockets = "^11.0.2"
aiosqlite = "^0.19.0"
aiohttp = "^3.12.13"
Expand Down
Loading
Loading