Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ba60141
Add GroupPull edge
d3vzer0 Apr 22, 2026
2047964
ManagerOf should be traversable
d3vzer0 Apr 22, 2026
3a0cee1
Store application_secrets and resources in DuckDB
d3vzer0 Apr 22, 2026
7cbb80c
No need to store the full API response for ApplicationSecrets
d3vzer0 Apr 22, 2026
a703728
Add ReadClientSecret edge for user role assignments + removed old TODO's
d3vzer0 Apr 22, 2026
214c6dc
Add ReadClientSecret edge for client role assignments + removed old/u…
d3vzer0 Apr 22, 2026
78c1445
Add ReadClientSecret edge for group role assignments
d3vzer0 Apr 22, 2026
8ae8966
Added additional lookups and placeholders for scoped resource sets (f…
d3vzer0 Apr 22, 2026
1aa53b2
(Auto) styling fixes
d3vzer0 Apr 22, 2026
5359441
Added _links to Pydantic model for Resource data
d3vzer0 Apr 22, 2026
283c1e3
Revert the change which removed the secret_hash from the ApplicationS…
d3vzer0 Apr 22, 2026
51b25a2
Revert the scoped Okta_AppAdmin edge
d3vzer0 Apr 22, 2026
e3a1202
Revert Okta_MembershipSync edge for Okta Org2Org
d3vzer0 Apr 22, 2026
6bb81b5
Add scope for helpdesk_admin edge(s)
d3vzer0 Apr 28, 2026
674772e
Add scope for helpdesk_admin edge(s)
d3vzer0 Apr 28, 2026
4d70be4
Add scope for app_admin edge(s)
d3vzer0 Apr 28, 2026
4cf71ea
Updated edge descriptions that are still work in progress
d3vzer0 Apr 28, 2026
9b85250
Remove unused key generation code
d3vzer0 Apr 28, 2026
c38b238
Add ORG_ADMIN edges to client and group role assignments
d3vzer0 Apr 28, 2026
711790d
Verified AD->Okta sync
d3vzer0 Apr 28, 2026
c859d48
Match by agent name, strip out the initial dash
d3vzer0 Apr 28, 2026
27baaaf
Verified TODOs
d3vzer0 Apr 28, 2026
e7efa76
Disable kerberos_sso_edge generation for future update
d3vzer0 Apr 29, 2026
4d8b078
Added tenant_domain property based on the tenant_url from config
d3vzer0 Apr 29, 2026
8c991f9
Remove debug prints
d3vzer0 Apr 29, 2026
11f9d3f
Create Membership sync edge between okta group and okta group (org2org)
d3vzer0 Apr 29, 2026
a6f2ca3
Add helpers for generating shared (ie. originating from multiple edge…
d3vzer0 Apr 29, 2026
17e06f5
Fixed scoping/rr lookups for add_member edges
d3vzer0 Apr 29, 2026
775823e
Added multiple conditions for the org2org okta group membership sync …
d3vzer0 Apr 29, 2026
b3615ba
The ID field for asset type `resource` should be an optional field
d3vzer0 Apr 30, 2026
c2a86a8
Create base RoleAssignment class that client, group and user role ass…
d3vzer0 Apr 30, 2026
f94232d
Create base RoleAssignment class that client, group and user role ass…
d3vzer0 Apr 30, 2026
5efb9e7
Fixed hybrid matching value for agents
d3vzer0 Apr 30, 2026
5b73e8a
Remove collected=true property from nodes that don't require it
d3vzer0 May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/openhound_okta/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
@dataclass
class OktaNodeProperties(BaseProperties):
tenant: str
tenant_domain: str
id: str


Expand Down
83 changes: 74 additions & 9 deletions src/openhound_okta/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,34 @@ def has_role_permission(self, role_id: str, permission: str) -> bool:
f"""SELECT label FROM {self.schema}.custom_role_permissions WHERE role_id = ? AND label = ?""",
[role_id, permission],
)
return res is not None
return res

@lru_cache
def application_by_id(self, app_id: str) -> bool:
res = self._find_single_object(
f"""SELECT id FROM {self.schema}.applications WHERE id = ?""",
[app_id],
)
return res

@lru_cache
def application_settings(self, app_id: str) -> bool:
res = self._find_single_object(
f"""SELECT settings FROM {self.schema}.applications WHERE id = ?""",
[app_id],
)
return res

@lru_cache
def all_groups(self):
res = self._find_all_objects(f"""SELECT id FROM {self.schema}.groups""")
return res

@lru_cache
def non_admin_groups(self):
res = self._find_all_objects(f"""SELECT id FROM {self.schema}.non_admin_groups""")
return res

@lru_cache
def all_users(self):
res = self._find_all_objects(f"""SELECT id FROM {self.schema}.users""")
Expand All @@ -43,6 +64,57 @@ def all_applications(self):
res = self._find_all_objects(f"""SELECT id FROM {self.schema}.applications""")
return res

@lru_cache
def application_ids_by_name(self, app_name: str):
res = self._find_all_objects(
f"""SELECT id FROM {self.schema}.applications WHERE name = ?""",
[app_name],
)
return res

@lru_cache
def application_secret_ids(self, app_id: str):
res = self._find_all_objects(
f"""SELECT id FROM {self.schema}.application_secrets WHERE app_id = ?""",
[app_id],
)
return res

@lru_cache
def resource_set_application_ids(self, resource_set_id: str):
return self._resource_set_resource_ids(
resource_set_id, "apps", self.all_applications()
)

@lru_cache
def resource_set_group_ids(self, resource_set_id: str):
return self._resource_set_resource_ids(
resource_set_id, "groups", self.all_groups()
)

@lru_cache
def resource_set_non_admin_group_ids(self, resource_set_id: str):
resource_set_groups = set(self.resource_set_group_ids(resource_set_id))
non_admin_groups = {group_id for (group_id,) in self.non_admin_groups()}
return tuple(sorted(resource_set_groups & non_admin_groups))

def _resource_set_resource_ids(
self, resource_set_id: str, resource_type: str, all_resource_rows
):
rows = self._find_all_objects(
f"""SELECT orn FROM {self.schema}.resources WHERE resource_set_id = ? AND contains(orn, ?)""",
[resource_set_id, f":{resource_type}"],
)

resource_ids: set[str] = set()
for (orn,) in rows:
split_orn = orn.split(":")
if len(split_orn) == 5 and split_orn[-1] == resource_type:
resource_ids.update(resource_id for (resource_id,) in all_resource_rows)
elif len(split_orn) == 6 and split_orn[-2] == resource_type:
resource_ids.add(split_orn[-1])
return tuple(sorted(resource_ids))

@lru_cache
def all_policies(self):
res = self._find_all_objects(f"""SELECT id FROM {self.schema}.policies""")
Expand Down Expand Up @@ -70,14 +142,7 @@ def all_devices(self):
@lru_cache
def manager_id(self, manager_login: str):
res = self._find_single_object(
f"""SELECT id, json_value(profile, 'login') AS login FROM {self.schema}.users WHERE login = ?""",
f"""SELECT id FROM {self.schema}.users WHERE json_extract_string(profile, '$.login') = ?""",
[manager_login],
)
return res

# @lru_cache
# def group_member_ids(self, group_id: str):
# res = self._find_all_objects(
# f"""SELECT id FROM {self.schema}.group_memberships WHERE group_id = ?""",
# [group_id]
# return res
14 changes: 8 additions & 6 deletions src/openhound_okta/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import dlt
from dlt.extract.source import DltSource
from openhound.core.app import OpenHound
from openhound.core.collect import CollectContext
from openhound.core.convert import ConvertContext
from openhound.core.preproc import PreProcContext

from openhound_okta.transforms import transforms
from openhound_okta.lookup import OktaLookup
from openhound_okta.transforms import transforms

app = OpenHound("okta", source_kind="Okta", help="OpenGraph collector for Okta")

Expand All @@ -23,15 +24,16 @@ def collect(ctx: CollectContext) -> DltSource:


@app.convert(lookup=OktaLookup)
def convert(ctx: ConvertContext) -> tuple[DltSource, dict]:
def convert(ctx: ConvertContext):
"""Register a Typer CLI command that converts previously collected Okta resources into OpenGraph nodes and edges.

Args:
ctx (ConvertContext): Returns DLT pipeline context.
"""
from openhound_okta.source import source as okta_source

return okta_source(), {"tenant": "somethingsomething"}
from urllib.parse import urlparse
tenant_url = dlt.secrets.get("sources.source.okta.credentials.base_url")
return okta_source(), {"tenant": urlparse(tenant_url).netloc}


@app.preproc(transformer=transforms)
Expand All @@ -41,14 +43,14 @@ def preprocess(ctx: PreProcContext):
"users": "users",
"groups": "groups",
"applications": "applications",
"application_secrets": "application_secrets",
"devices": "devices",
"authorization_servers": "authorization_servers",
"identity_providers": "identity_providers",
"policies": "policies",
"resources": "resources",
"user_role_assignments": "user_role_assignments",
"group_role_assignments": "group_role_assignments",
"client_role_assignments": "client_role_assignments",
"custom_role_permissions": "custom_role_permissions",
# "application_secrets": "application_secrets",
# "custom_role_permissions": "custom_role_permissions",
}
8 changes: 6 additions & 2 deletions src/openhound_okta/models/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def as_node(self):
kinds=[nk.AGENT],
properties=AgentProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
id=self.id,
name=self.name,
displayname=self.name,
Expand All @@ -95,10 +96,13 @@ def as_node(self):

@property
def _hosts_agent_edge(self):
# TODO: It seems that the agent name in Okta has TEST- prepended. Check the conditional logic
if self.agent_type == "AD":
# The agent name has a prefix that needs to be stripped before matching is possible
agent_name_split = self.name.split("-")
agent_name = '-'.join(agent_name_split[1:])
agent_match = f"{agent_name.upper()}.{self.agent_pool_name.upper()}"
match_with = PropertyMatch(
key="domain", value=f"{self.name}.{self.agent_pool_name}"
key="name", value=agent_match
)
yield Edge(
start=ConditionalEdgePath(
Expand Down
1 change: 1 addition & 0 deletions src/openhound_okta/models/agent_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def as_node(self):
kinds=[nk.AGENT_POOL],
properties=AgentPoolProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
id=self.id,
name=self.name,
displayname=self.name,
Expand Down
6 changes: 2 additions & 4 deletions src/openhound_okta/models/api_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime

from openhound.core.asset import BaseAsset, EdgeDef, NodeDef
Expand All @@ -17,9 +17,6 @@ class ApiServiceProperties(OktaNodeProperties):
app_type: str
created_at: datetime
oauth_scopes: list[str] | None = None
collected: bool = field(
default=True, metadata={"description": "Collected/generated by OpenHound"}
)


@app.asset(
Expand Down Expand Up @@ -64,6 +61,7 @@ def as_node(self):
kinds=[nk.INTEGRATION],
properties=ApiServiceProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
id=self.id,
name=self.name,
displayname=self.name,
Expand Down
1 change: 1 addition & 0 deletions src/openhound_okta/models/api_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def as_node(self):
kinds=[nk.API_TOKEN],
properties=ApiTokenProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
id=self.id,
name=self.name,
displayname=self.name,
Expand Down
61 changes: 26 additions & 35 deletions src/openhound_okta/models/application.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime
from typing import ClassVar

Expand All @@ -8,8 +8,6 @@
Edge,
EdgePath,
EdgeProperties,
ConditionalEdgePath,
PropertyMatch,
)
from pydantic import BaseModel, ConfigDict, Field

Expand All @@ -28,9 +26,6 @@ class ApplicationProperties(OktaNodeProperties):
last_updated: datetime | None = None
sign_on_mode: str | None = None
orn: str | None = None
collected: bool = field(
default=True, metadata={"description": "Collected/generated by OpenHound"}
)


class JWK(BaseModel):
Expand Down Expand Up @@ -121,6 +116,7 @@ def as_node(self):
kinds=[nk.APPLICATION],
properties=ApplicationProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
id=self.id,
name=self.name,
displayname=self.label or self.name,
Expand All @@ -136,7 +132,8 @@ def as_node(self):

@property
def _outbound_jamf_sso_edge(self):
# TODO: Should this be conditional on sign_on_mode == SAML_2_0?
# SAML == SAML_2_0
# SWA == SECURE_PASSWORD_STORE, BROWSER_PLUGIN or AUTO_LOGIN
if self.name == "jamfsoftwareserver":
jamf_domain = self.settings.app.get("domain")
if jamf_domain:
Expand All @@ -148,38 +145,31 @@ def _outbound_jamf_sso_edge(self):
properties=EdgeProperties(traversable=True),
)

# @property
# def _outbound_jamf_swa_edge(self):
# # TODO: Should this be conditional on sign_on_mode SECURE_PASSWORD_STORE, BROWSER_PLUGIN or AUTO_LOGIN?
# if self.name == "jamfsoftwareserver":
# jamf_domain = self.settings.app.get("domain")
# if jamf_domain:
# jamf_domain = jamf_domain.replace('"', "")
# yield Edge(
# kind=ek.OUTBOUND_ORG_SSO,
# start=EdgePath(value=self.id, match_by="id"),
# end=EdgePath(value=f"{jamf_domain}-SSO", match_by="id"),
# properties=EdgeProperties(traversable=True),
# )

# @property
# def _outbount_github_sso_edge(self):
# TODO: Github matching based on domain only may cause conflicts, should we actually do this before conditional matching?
# TODO: Wait for the Github Enterprise (v.s. org) implementation is finalized
# if self.name == "githubcloud":
# yield Edge(
# kind=ek.OUTBOUND_ORG_SSO,
# start=EdgePath(value=self.id, match_by="id"),
# ....
# properties=EdgeProperties(traversable=True),
# )
#

@property
def _kerberos_sso_edge(self):
# TODO: Check logic
if self.name == "active_directory":
domain = self.label.split(".")[-2]
end_spn = f"HTTP/{domain}.kerberos.okta.com"
condition = PropertyMatch(key="serviceprincipalnames", value=end_spn)
yield Edge(
kind=ek.KERBEROS_SSO,
start=ConditionalEdgePath(kind="User", property_matchers=[condition]),
end=EdgePath(value=self.id, match_by="id"),
)
# @property
# def _kerberos_sso_edge(self):
# # TODO: matching against arrays needs to be supported by the BH API before this will
# # match with nodes
# if self.name == "active_directory":
# domain = self.label.split(".")[-2]
# end_spn = f"HTTP/{domain}.kerberos.okta.com"
# condition = PropertyMatch(key="serviceprincipalnames", value=end_spn)
# yield Edge(
# kind=ek.KERBEROS_SSO,
# start=ConditionalEdgePath(kind="User", property_matchers=[condition]),
# end=EdgePath(value=self.id, match_by="id"),
# )

@property
def _contains_edge(self):
Expand All @@ -192,6 +182,7 @@ def _contains_edge(self):

@property
def edges(self):
# Disabled until BHE supports array-based matching
# yield from self._kerberos_sso_edge
yield from self._contains_edge
yield from self._outbound_jamf_sso_edge
yield from self._kerberos_sso_edge
1 change: 1 addition & 0 deletions src/openhound_okta/models/application_jwks.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def as_node(self):
kinds=[nk.JWK],
properties=JWKProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
name=self.display_name,
displayname=self.display_name,
id=self.id,
Expand Down
4 changes: 2 additions & 2 deletions src/openhound_okta/models/application_secrets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from dataclasses import field
from datetime import datetime

from openhound.core.asset import BaseAsset, EdgeDef, NodeDef
Expand All @@ -9,8 +10,6 @@
from openhound_okta.kinds import edges as ek, nodes as nk
from openhound_okta.main import app

from dataclasses import field


@dataclass
class SecretProperties(OktaNodeProperties):
Expand Down Expand Up @@ -68,6 +67,7 @@ def as_node(self):
kinds=[nk.CLIENT_SECRET],
properties=SecretProperties(
tenant=self._lookup.org_id(),
tenant_domain=self._extras["tenant"],
name=self.secret_hash,
displayname=self.secret_hash,
id=self.id,
Expand Down
Loading
Loading