diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 1c0e89f529f4..85f0b288ccb5 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -34,7 +34,7 @@ env: jobs: setup: - runs-on: ${{ vars.DEPOT_PROJECT_ID != '' && 'depot-ubuntu-latest' || 'ubuntu-latest' }} + runs-on: ${{ vars.DEPOT_PROJECT_ID != '' && 'depot-ubuntu-latest-2' || 'ubuntu-latest' }} outputs: frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' || github.event_name == 'release' }} ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' || github.event_name == 'release' }} diff --git a/.github/workflows/verify-quickstart-compose.yml b/.github/workflows/verify-quickstart-compose.yml index 565e140b48d9..7d6d44ea978f 100644 --- a/.github/workflows/verify-quickstart-compose.yml +++ b/.github/workflows/verify-quickstart-compose.yml @@ -6,6 +6,8 @@ on: branches: - master pull_request: + branches: + - master jobs: verify-quickstart-compose-updated: name: Verify quickstart compose file is up-to-date diff --git a/CLAUDE.md b/CLAUDE.md index 51750f909e9c..521db453edba 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -67,6 +67,15 @@ Each Python module has a gradle setup similar to `metadata-ingestion/` (document - **MCE/MCL**: Metadata Change Events/Logs for updates - **Entity Registry**: YAML config defining entity-aspect relationships (`metadata-models/src/main/resources/entity-registry.yml`) +### Validation Architecture + +**IMPORTANT**: Validation must work across all APIs (GraphQL, OpenAPI, RestLI). + +- **Never add validation in API-specific layers** (GraphQL resolvers, REST controllers) - this only protects one API +- **Always implement AspectPayloadValidators** in `metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/` +- **Register as Spring beans** in `SpringStandardPluginConfiguration.java` +- **Follow existing patterns**: See `SystemPolicyValidator.java` and `PolicyFieldTypeValidator.java` as examples + ## Development Flow 1. **Schema changes** in `metadata-models/` trigger code generation across all languages diff --git a/build.gradle b/build.gradle index 49b790a4809c..c33bcfc515c3 100644 --- a/build.gradle +++ b/build.gradle @@ -216,7 +216,7 @@ project.ext.externalDependency = [ 'log4jApi': "org.apache.logging.log4j:log4j-api:$log4jVersion", 'log4j12Api': "org.slf4j:log4j-over-slf4j:$slf4jVersion", 'log4j2Api': "org.apache.logging.log4j:log4j-to-slf4j:$log4jVersion", - 'lombok': 'org.projectlombok:lombok:1.18.30', + 'lombok': 'org.projectlombok:lombok:1.18.42', 'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0', 'mavenArtifact': "org.apache.maven:maven-artifact:$mavenVersion", 'mixpanel': 'com.mixpanel:mixpanel-java:1.4.4', @@ -257,6 +257,7 @@ project.ext.externalDependency = [ 'postgresql': 'org.postgresql:postgresql:42.7.8', 'protobuf': 'com.google.protobuf:protobuf-java:4.32.0', 'grpcProtobuf': 'io.grpc:grpc-protobuf:1.75.0', + 'grpcNettyShaded': 'io.grpc:grpc-netty-shaded:1.75.0', 'rangerCommons': 'org.apache.ranger:ranger-plugins-common:2.3.0', 'reflections': 'org.reflections:reflections:0.9.12', 'resilience4j': 'io.github.resilience4j:resilience4j-retry:1.7.1', diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java index bb0f9897f76b..2f23c7eec837 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java @@ -3,7 +3,6 @@ import static auth.AuthUtils.*; import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME; -import static org.pac4j.play.store.PlayCookieSessionStore.*; import static play.mvc.Results.internalServerError; import static utils.FrontendConstants.SSO_LOGIN; @@ -50,7 +49,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; -import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -71,6 +69,7 @@ import org.pac4j.core.context.Cookie; import org.pac4j.core.context.FrameworkParameters; import org.pac4j.core.context.WebContext; +import org.pac4j.core.context.session.SessionStore; import org.pac4j.core.credentials.Credentials; import org.pac4j.core.engine.DefaultCallbackLogic; import org.pac4j.core.exception.http.HttpAction; @@ -80,10 +79,10 @@ import org.pac4j.core.profile.UserProfile; import org.pac4j.core.util.CommonHelper; import org.pac4j.core.util.Pac4jConstants; -import org.pac4j.play.store.PlayCookieSessionStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import play.mvc.Result; +import utils.SerializationUtils; /** * This class contains the logic that is executed when an OpenID Connect Identity Provider redirects @@ -207,21 +206,15 @@ private Pair superPerform( private void setContextRedirectUrl(CallContext ctx) { WebContext context = ctx.webContext(); - PlayCookieSessionStore sessionStore = (PlayCookieSessionStore) ctx.sessionStore(); - - Optional redirectUrl = - context.getRequestCookies().stream() - .filter(cookie -> REDIRECT_URL_COOKIE_NAME.equals(cookie.getName())) - .findFirst(); - redirectUrl.ifPresent( - cookie -> - sessionStore.set( - context, - Pac4jConstants.REQUESTED_URL, - sessionStore - .getSerializer() - .deserializeFromBytes( - uncompressBytes(Base64.getDecoder().decode(cookie.getValue()))))); + SessionStore sessionStore = ctx.sessionStore(); + + context.getRequestCookies().stream() + .filter(cookie -> REDIRECT_URL_COOKIE_NAME.equals(cookie.getName())) + .map(Cookie::getValue) + .map(SerializationUtils::deserializeFoundAction) + .findFirst() + .ifPresent( + foundAction -> sessionStore.set(context, Pac4jConstants.REQUESTED_URL, foundAction)); } private Result handleOidcCallback( diff --git a/datahub-frontend/app/controllers/AuthenticationController.java b/datahub-frontend/app/controllers/AuthenticationController.java index 540497790ab6..0f606ff8c26a 100644 --- a/datahub-frontend/app/controllers/AuthenticationController.java +++ b/datahub-frontend/app/controllers/AuthenticationController.java @@ -2,7 +2,6 @@ import static auth.AuthUtils.*; import static org.pac4j.core.client.IndirectClient.ATTEMPTED_AUTHENTICATION_SUFFIX; -import static org.pac4j.play.store.PlayCookieSessionStore.*; import static utils.FrontendConstants.FALLBACK_LOGIN; import static utils.FrontendConstants.GUEST_LOGIN; import static utils.FrontendConstants.PASSWORD_LOGIN; @@ -28,7 +27,6 @@ import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.time.Duration; -import java.util.Base64; import java.util.Optional; import javax.annotation.Nonnull; import javax.inject.Inject; @@ -37,11 +35,11 @@ import org.pac4j.core.client.Client; import org.pac4j.core.context.CallContext; import org.pac4j.core.context.WebContext; +import org.pac4j.core.context.session.SessionStore; import org.pac4j.core.exception.http.FoundAction; import org.pac4j.core.exception.http.RedirectionAction; import org.pac4j.play.PlayWebContext; import org.pac4j.play.http.PlayHttpActionAdapter; -import org.pac4j.play.store.PlayCookieSessionStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import play.data.validation.Constraints; @@ -51,6 +49,7 @@ import play.mvc.Result; import play.mvc.Results; import security.AuthenticationManager; +import utils.SerializationUtils; public class AuthenticationController extends Controller { public static final String AUTH_VERBOSE_LOGGING = "auth.verbose.logging"; @@ -75,7 +74,7 @@ public class AuthenticationController extends Controller { @Inject private org.pac4j.core.config.Config ssoConfig; - @VisibleForTesting @Inject protected PlayCookieSessionStore playCookieSessionStore; + @VisibleForTesting @Inject protected SessionStore sessionStore; @VisibleForTesting @Inject protected SsoManager ssoManager; @@ -371,11 +370,9 @@ protected Result addRedirectCookie(Result result, CallContext ctx, String redire // to reduce size of the session cookie FoundAction foundAction = new FoundAction(BasePathUtils.addBasePath(redirectPath, this.basePath)); - byte[] javaSerBytes = - ((PlayCookieSessionStore) ctx.sessionStore()).getSerializer().serializeToBytes(foundAction); - String serialized = Base64.getEncoder().encodeToString(compressBytes(javaSerBytes)); Http.CookieBuilder redirectCookieBuilder = - Http.Cookie.builder(REDIRECT_URL_COOKIE_NAME, serialized); + Http.Cookie.builder( + REDIRECT_URL_COOKIE_NAME, SerializationUtils.serializeFoundAction(foundAction)); redirectCookieBuilder.withPath(BasePathUtils.addBasePath("/", this.basePath)); redirectCookieBuilder.withSecure(true); redirectCookieBuilder.withHttpOnly(true); @@ -422,7 +419,7 @@ private CallContext buildCallContext(Http.RequestHeader request) { PlayWebContext webContext = new PlayWebContext(request); // Then create CallContext using the web context and session store - return new CallContext(webContext, playCookieSessionStore); + return new CallContext(webContext, sessionStore); } private void configurePac4jSessionStore(CallContext ctx, Client client) { @@ -431,11 +428,11 @@ private void configurePac4jSessionStore(CallContext ctx, Client client) { // This is to prevent previous login attempts from being cached. // We replicate the logic here, which is buried in the Pac4j client. Optional attempt = - playCookieSessionStore.get(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX); + ctx.sessionStore().get(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX); if (attempt.isPresent() && !"".equals(attempt.get())) { logger.debug( "Found previous login attempt. Removing it manually to prevent unexpected errors."); - playCookieSessionStore.set(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX, ""); + ctx.sessionStore().set(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX, ""); } } diff --git a/datahub-frontend/app/utils/SerializationUtils.java b/datahub-frontend/app/utils/SerializationUtils.java new file mode 100644 index 000000000000..98fb7d41472f --- /dev/null +++ b/datahub-frontend/app/utils/SerializationUtils.java @@ -0,0 +1,27 @@ +package utils; + +import static org.pac4j.play.store.PlayCookieSessionStore.compressBytes; +import static org.pac4j.play.store.PlayCookieSessionStore.uncompressBytes; + +import java.util.Base64; +import javax.annotation.Nonnull; +import org.pac4j.core.exception.http.FoundAction; +import org.pac4j.core.util.serializer.JavaSerializer; + +public class SerializationUtils { + + private static final JavaSerializer JAVA_SERIALIZER = new JavaSerializer(); + + private SerializationUtils() {} + + public static String serializeFoundAction(@Nonnull final FoundAction foundAction) { + byte[] javaSerBytes = JAVA_SERIALIZER.serializeToBytes(foundAction); + return Base64.getEncoder().encodeToString(compressBytes(javaSerBytes)); + } + + public static FoundAction deserializeFoundAction(@Nonnull final String serialized) { + return (FoundAction) + JAVA_SERIALIZER.deserializeFromBytes( + uncompressBytes(Base64.getDecoder().decode(serialized))); + } +} diff --git a/datahub-frontend/test/controllers/AuthenticationControllerTest.java b/datahub-frontend/test/controllers/AuthenticationControllerTest.java index ff6e5eef110c..36b05578fab9 100644 --- a/datahub-frontend/test/controllers/AuthenticationControllerTest.java +++ b/datahub-frontend/test/controllers/AuthenticationControllerTest.java @@ -100,7 +100,7 @@ public void setUp() { // Create the controller controller = new AuthenticationController(mockConfig); - controller.playCookieSessionStore = playCookieSessionStore; + controller.sessionStore = playCookieSessionStore; controller.ssoManager = ssoManager; controller.authClient = authClient; } @@ -353,7 +353,7 @@ public void testAuthenticateWithBasePathRedirect() { AuthenticationController testController = new AuthenticationController(config); testController.ssoManager = ssoManager; testController.authClient = authClient; - testController.playCookieSessionStore = playCookieSessionStore; + testController.sessionStore = playCookieSessionStore; // Configure SSO to be enabled when(ssoManager.isSsoEnabled()).thenReturn(true); @@ -408,7 +408,7 @@ public void testAuthenticateWithLogOutRedirect() { AuthenticationController testController = new AuthenticationController(config); testController.ssoManager = ssoManager; testController.authClient = authClient; - testController.playCookieSessionStore = playCookieSessionStore; + testController.sessionStore = playCookieSessionStore; // Configure SSO to be enabled when(ssoManager.isSsoEnabled()).thenReturn(true); @@ -465,7 +465,7 @@ public void testSsoWithBasePath() { Config config = ConfigFactory.parseMap(configMap); AuthenticationController testController = new AuthenticationController(config); testController.ssoManager = ssoManager; - testController.playCookieSessionStore = playCookieSessionStore; + testController.sessionStore = playCookieSessionStore; // Configure SSO to be enabled when(ssoManager.isSsoEnabled()).thenReturn(true); @@ -619,7 +619,7 @@ public void testRedirectToIdentityProviderWithBasePath() { Config config = ConfigFactory.parseMap(configMap); AuthenticationController testController = new AuthenticationController(config); testController.ssoManager = ssoManager; - testController.playCookieSessionStore = playCookieSessionStore; + testController.sessionStore = playCookieSessionStore; // Configure SSO to be enabled when(ssoManager.isSsoEnabled()).thenReturn(true); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpsertLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpsertLinkResolver.java index 29532142ebf2..8e0f6a00a1cc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpsertLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpsertLinkResolver.java @@ -59,7 +59,6 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw actor, settingsInput, _entityService); - log.warn(">>> NO ERROR RAISED"); return true; } catch (Exception e) { log.error( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java index 934a9d66fe20..82264894dbd1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java @@ -25,7 +25,7 @@ public class CreateInviteTokenResolver implements DataFetcher get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - if (!canManagePolicies(context)) { + if (!canManageUserCredentials(context)) { throw new AuthorizationException( "Unauthorized to create invite tokens. Please contact your DataHub administrator if this needs corrective action."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java index 20477e756991..c729ea76f37f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java @@ -25,7 +25,7 @@ public class GetInviteTokenResolver implements DataFetcher get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - if (!canManagePolicies(context)) { + if (!canManageUserCredentials(context)) { throw new AuthorizationException( "Unauthorized to get invite tokens. Please contact your DataHub administrator if this needs corrective action."); } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 33bdd100a44d..c7b53c470ff0 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -9278,14 +9278,14 @@ input UpsertLinkInput { label: String! """ - Optional settings input for this link + The urn of the resource or entity to attach the link to, for example a dataset urn """ - settings: LinkSettingsInput + resourceUrn: String! """ - The urn of the resource or entity to attach the link to, for example a dataset urn + Optional settings input for this link """ - resourceUrn: String! + settings: LinkSettingsInput } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/link/UpsertLinkResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/link/UpsertLinkResolverTest.java index fddc698e7259..a22dcc745148 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/link/UpsertLinkResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/link/UpsertLinkResolverTest.java @@ -49,8 +49,8 @@ public void testShouldCreateLinkWhenEntityHasNoLinks() throws Exception { new UpsertLinkInput( "https://original-url.com", "Original label", - new LinkSettingsInput(false), - ASSET_URN)); + ASSET_URN, + new LinkSettingsInput(false))); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); resolver.get(mockEnv).get(); @@ -77,7 +77,7 @@ public void testShouldCreateLinkWhenUpsertLinkWithTheSameUrlAndDifferentLabel() DataFetchingEnvironment mockEnv = initMockEnv( new UpsertLinkInput( - "https://original-url.com", "New label", new LinkSettingsInput(false), ASSET_URN)); + "https://original-url.com", "New label", ASSET_URN, new LinkSettingsInput(false))); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); resolver.get(mockEnv).get(); @@ -104,7 +104,7 @@ public void testShouldCreateLinkWhenUpsertLinkWithDifferentUrlAndTheSameLabel() DataFetchingEnvironment mockEnv = initMockEnv( new UpsertLinkInput( - "https://new-url.com", "Original label", new LinkSettingsInput(false), ASSET_URN)); + "https://new-url.com", "Original label", ASSET_URN, new LinkSettingsInput(false))); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); resolver.get(mockEnv).get(); @@ -133,8 +133,8 @@ public void testShouldUpdateLinkWhenUpsertLinkWithTheSameUrlAndLabel() throws Ex new UpsertLinkInput( "https://original-url.com", "Original label", - new LinkSettingsInput(false), - ASSET_URN)); + ASSET_URN, + new LinkSettingsInput(false))); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); resolver.get(mockEnv).get(); @@ -151,8 +151,8 @@ public void testGetFailureNoEntity() throws Exception { new UpsertLinkInput( "https://original-url.com", "Original label", - new LinkSettingsInput(false), - ASSET_URN)); + ASSET_URN, + new LinkSettingsInput(false))); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); @@ -172,8 +172,8 @@ public void testGetFailureNoPermission() throws Exception { new UpsertLinkInput( "https://original-url.com", "Original label", - new LinkSettingsInput(false), - ASSET_URN)); + ASSET_URN, + new LinkSettingsInput(false))); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); @@ -208,8 +208,8 @@ public void testUpsertLinkFailure() throws Exception { new UpsertLinkInput( "https://original-url.com", "Original label", - new LinkSettingsInput(false), - ASSET_URN)); + ASSET_URN, + new LinkSettingsInput(false))); UpsertLinkResolver resolver = new UpsertLinkResolver(mockService, mockClient); assertThrows( diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 03c19e9eaeac..1ad8d41a1e7f 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -22,6 +22,7 @@ dependencies { implementation externalDependency.mustache implementation externalDependency.javaxInject + implementation externalDependency.springActuator implementation(externalDependency.hadoopClient) { exclude group: 'net.minidev', module: 'json-smart' exclude group: 'com.nimbusds', module: 'nimbus-jose-jwt' @@ -229,6 +230,84 @@ task runRestoreIndicesUrn(type: Exec) { bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100", "-a", "urnBasedPagination=true" } +/** + * Runs LoadIndices on locally running system. This ensures indices have the correct mappings + * and then loads data from database to Elasticsearch. + * + * The process includes two steps: + * 1. BuildIndicesStep: Ensures indices have the correct mappings (creates/updates index structure) + * 2. LoadIndicesStep: Loads data from database to Elasticsearch + * + * The task automatically configures ES_BULK_REQUESTS_LIMIT to match the batch size + * and disables ES_BULK_FLUSH_PERIOD for optimal bulk processor performance. + * + * Optional parameters: + * - limit: Maximum number of records to process (default: no limit) + * - batchSize: Number of records per batch (default: 10000) + * - esThreadCount: Elasticsearch I/O thread count (default: 3, enables async bulk processing) + * - urnLike: URN pattern filter (e.g., "urn:li:dataset:%") + * - aspectNames: Comma-separated aspect names to filter + * - lePitEpochMs: Process records created before this timestamp + * - gePitEpochMs: Process records created after this timestamp + * + * Usage examples: + * ./gradlew runLoadIndices + * ./gradlew runLoadIndices -Plimit=5000 + * ./gradlew runLoadIndices -Plimit=1000 -PbatchSize=2500 + * ./gradlew runLoadIndices -PurnLike="urn:li:dataset:%" + * ./gradlew runLoadIndices -PesThreadCount=3 + */ +task runLoadIndices(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the load indices process locally - ensures correct mappings and loads data from database to Elasticsearch." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + + def args = ["java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n", + "-jar", "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "LoadIndices"] + + // Add batchSize (default: 10000) + def batchSize = project.hasProperty('batchSize') ? project.getProperty('batchSize') : '10000' + args.addAll(["-a", "batchSize=${batchSize}"]) + + // Optimize bulk request limit for sustained load + environment "ES_BULK_REQUESTS_LIMIT", batchSize + // Set flush period to 5 minutes to allow proper batching + environment "ES_BULK_FLUSH_PERIOD", "300" + + // Configure Elasticsearch I/O threads for better BulkProcessor performance + // Increase from default 2 to allow more concurrent HTTP connections + def threadCount = project.hasProperty('esThreadCount') ? project.getProperty('esThreadCount') : '4' + environment "ELASTICSEARCH_THREAD_COUNT", threadCount + + // Add limit if specified + if (project.hasProperty('limit')) { + args.addAll(["-a", "limit=${project.getProperty('limit')}"]) + } + + // Add urnLike if specified + if (project.hasProperty('urnLike')) { + args.addAll(["-a", "urnLike=${project.getProperty('urnLike')}"]) + } + + // Add aspectNames if specified + if (project.hasProperty('aspectNames')) { + args.addAll(["-a", "aspectNames=${project.getProperty('aspectNames')}"]) + } + + // Add lePitEpochMs if specified + if (project.hasProperty('lePitEpochMs')) { + args.addAll(["-a", "lePitEpochMs=${project.getProperty('lePitEpochMs')}"]) + } + + // Add gePitEpochMs if specified + if (project.hasProperty('gePitEpochMs')) { + args.addAll(["-a", "gePitEpochMs=${project.getProperty('gePitEpochMs')}"]) + } + + commandLine args +} + docker { dependsOn(bootJar) name "${docker_registry}/${docker_repo}:${versionTag}" diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCli.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCli.java index b3f1a8750bef..fe0996d6fa79 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCli.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCli.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; +import com.linkedin.datahub.upgrade.loadindices.LoadIndices; import com.linkedin.datahub.upgrade.removeunknownaspects.RemoveUnknownAspects; import com.linkedin.datahub.upgrade.restorebackup.RestoreBackup; import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices; @@ -12,7 +13,6 @@ import com.linkedin.upgrade.DataHubUpgradeState; import io.datahubproject.metadata.context.OperationContext; import java.util.List; -import javax.inject.Inject; import javax.inject.Named; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; @@ -34,15 +34,19 @@ private static final class Args { private final UpgradeManager _upgradeManager = new DefaultUpgradeManager(); - @Inject + @Autowired(required = false) + @Named("loadIndices") + private LoadIndices loadIndices; + + @Autowired(required = false) @Named("restoreIndices") private RestoreIndices restoreIndices; - @Inject + @Autowired(required = false) @Named("restoreBackup") private RestoreBackup restoreBackup; - @Inject + @Autowired(required = false) @Named("removeUnknownAspects") private RemoveUnknownAspects removeUnknownAspects; @@ -66,29 +70,65 @@ private static final class Args { @Named("systemUpdateCron") private SystemUpdateCron systemUpdateCron; - @Autowired + @Autowired(required = false) @Named("reindexDebug") private ReindexDebug reindexDebug; @Override public void run(String... cmdLineArgs) { - _upgradeManager.register(restoreIndices); - _upgradeManager.register(restoreBackup); - _upgradeManager.register(removeUnknownAspects); + // Register upgrades with null checks and warnings + if (restoreIndices != null) { + _upgradeManager.register(restoreIndices); + } else { + log.warn("RestoreIndices upgrade not available - bean not found"); + } + + if (restoreBackup != null) { + _upgradeManager.register(restoreBackup); + } else { + log.warn("RestoreBackup upgrade not available - bean not found"); + } + + if (removeUnknownAspects != null) { + _upgradeManager.register(removeUnknownAspects); + } else { + log.warn("RemoveUnknownAspects upgrade not available - bean not found"); + } + + if (loadIndices != null) { + _upgradeManager.register(loadIndices); + } else { + log.warn("LoadIndices upgrade not available - bean not found"); + } + if (systemUpdate != null) { _upgradeManager.register(systemUpdate); + } else { + log.warn("SystemUpdate upgrade not available - bean not found"); } + if (systemUpdateBlocking != null) { _upgradeManager.register(systemUpdateBlocking); + } else { + log.warn("SystemUpdateBlocking upgrade not available - bean not found"); } + if (systemUpdateNonBlocking != null) { _upgradeManager.register(systemUpdateNonBlocking); + } else { + log.warn("SystemUpdateNonBlocking upgrade not available - bean not found"); } + if (systemUpdateCron != null) { _upgradeManager.register(systemUpdateCron); + } else { + log.warn("SystemUpdateCron upgrade not available - bean not found"); } + if (reindexDebug != null) { _upgradeManager.register(reindexDebug); + } else { + log.warn("ReindexDebug upgrade not available - bean not found"); } final Args args = new Args(); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java index 1c594f79773d..3e0550e2c881 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java @@ -1,47 +1,13 @@ package com.linkedin.datahub.upgrade; -import com.linkedin.gms.factory.auth.AuthorizerChainFactory; -import com.linkedin.gms.factory.auth.DataHubAuthorizerFactory; -import com.linkedin.gms.factory.event.ExternalEventsServiceFactory; -import com.linkedin.gms.factory.event.KafkaConsumerPoolFactory; -import com.linkedin.gms.factory.graphql.GraphQLEngineFactory; -import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; -import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory; -import com.linkedin.gms.factory.kafka.trace.KafkaTraceReaderFactory; -import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; -import com.linkedin.gms.factory.trace.TraceServiceFactory; import org.springframework.boot.WebApplicationType; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration; import org.springframework.boot.builder.SpringApplicationBuilder; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.FilterType; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; @SuppressWarnings("checkstyle:HideUtilityClassConstructor") -@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class}) -@ComponentScan( - basePackages = { - "com.linkedin.gms.factory", - "com.linkedin.datahub.upgrade.config", - "com.linkedin.datahub.upgrade.system.cdc", - "com.linkedin.metadata.dao.producer" - }, - excludeFilters = { - @ComponentScan.Filter( - type = FilterType.ASSIGNABLE_TYPE, - classes = { - ScheduledAnalyticsFactory.class, - AuthorizerChainFactory.class, - DataHubAuthorizerFactory.class, - SimpleKafkaConsumerFactory.class, - KafkaEventConsumerFactory.class, - GraphQLEngineFactory.class, - KafkaTraceReaderFactory.class, - TraceServiceFactory.class, - KafkaConsumerPoolFactory.class, - ExternalEventsServiceFactory.class - }) - }) +@Configuration +@Import(UpgradeConfigurationSelector.class) public class UpgradeCliApplication { public static void main(String[] args) { new SpringApplicationBuilder(UpgradeCliApplication.class, UpgradeCli.class) diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeConfigurationSelector.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeConfigurationSelector.java new file mode 100644 index 000000000000..0fd5e4344676 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeConfigurationSelector.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.upgrade; + +import com.linkedin.datahub.upgrade.conditions.GeneralUpgradeCondition; +import com.linkedin.datahub.upgrade.conditions.LoadIndicesCondition; +import com.linkedin.datahub.upgrade.config.GeneralUpgradeConfiguration; +import com.linkedin.datahub.upgrade.loadindices.LoadIndicesUpgradeConfig; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; + +/** + * Configuration selector that chooses the appropriate upgrade configuration based on command-line + * arguments. + */ +@Configuration +public class UpgradeConfigurationSelector { + + /** Configuration for LoadIndices upgrade - excludes Kafka components */ + @Configuration + @Conditional(LoadIndicesCondition.class) + @Import(LoadIndicesUpgradeConfig.class) + public static class LoadIndicesConfiguration {} + + /** Configuration for general upgrades - includes all components */ + @Configuration + @Conditional(GeneralUpgradeCondition.class) + @Import(GeneralUpgradeConfiguration.class) + public static class GeneralConfiguration {} +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/GeneralUpgradeCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/GeneralUpgradeCondition.java new file mode 100644 index 000000000000..769033ea0093 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/GeneralUpgradeCondition.java @@ -0,0 +1,21 @@ +package com.linkedin.datahub.upgrade.conditions; + +import java.util.Objects; +import java.util.Set; +import org.springframework.boot.ApplicationArguments; +import org.springframework.context.annotation.Condition; +import org.springframework.context.annotation.ConditionContext; +import org.springframework.core.type.AnnotatedTypeMetadata; + +public class GeneralUpgradeCondition implements Condition { + public static final String LOAD_INDICES_ARG = "LoadIndices"; + public static final Set EXCLUDED_ARGS = Set.of(LOAD_INDICES_ARG); + + @Override + public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) { + // This condition matches when LoadIndices is NOT in the arguments + return !context.getBeanFactory().getBean(ApplicationArguments.class).getNonOptionArgs().stream() + .filter(Objects::nonNull) + .anyMatch(EXCLUDED_ARGS::contains); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/LoadIndicesCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/LoadIndicesCondition.java new file mode 100644 index 000000000000..b3b941d72f89 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/LoadIndicesCondition.java @@ -0,0 +1,24 @@ +package com.linkedin.datahub.upgrade.conditions; + +import java.util.List; +import java.util.Objects; +import java.util.Set; +import org.springframework.boot.ApplicationArguments; +import org.springframework.context.annotation.Condition; +import org.springframework.context.annotation.ConditionContext; +import org.springframework.core.type.AnnotatedTypeMetadata; + +public class LoadIndicesCondition implements Condition { + public static final String LOAD_INDICES_ARG = "LoadIndices"; + public static final Set LOAD_INDICES_ARGS = Set.of(LOAD_INDICES_ARG); + + @Override + public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) { + List nonOptionArgs = + context.getBeanFactory().getBean(ApplicationArguments.class).getNonOptionArgs(); + if (nonOptionArgs == null) { + return false; + } + return nonOptionArgs.stream().filter(Objects::nonNull).anyMatch(LOAD_INDICES_ARGS::contains); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDebugCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/ReindexDebugCondition.java similarity index 93% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDebugCondition.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/ReindexDebugCondition.java index 1220a50cec6c..9c5c6d01a2a5 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDebugCondition.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/ReindexDebugCondition.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.config; +package com.linkedin.datahub.upgrade.conditions; import java.util.Objects; import org.springframework.boot.ApplicationArguments; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/SystemUpdateCondition.java similarity index 97% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/SystemUpdateCondition.java index 0d65af742a59..bce793c9d14a 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/SystemUpdateCondition.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.config; +package com.linkedin.datahub.upgrade.conditions; import java.util.Objects; import java.util.Set; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCronCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/SystemUpdateCronCondition.java similarity index 93% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCronCondition.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/SystemUpdateCronCondition.java index 98b52c1366ae..dd84151e3580 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCronCondition.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/conditions/SystemUpdateCronCondition.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.config; +package com.linkedin.datahub.upgrade.conditions; import java.util.Objects; import org.springframework.boot.ApplicationArguments; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDatasetLineageIndexFieldsConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDatasetLineageIndexFieldsConfig.java index d8c6534470ea..7a3dfdfd4dd9 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDatasetLineageIndexFieldsConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDatasetLineageIndexFieldsConfig.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.config; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.lineage.BackfillDatasetLineageIndexFields; import com.linkedin.metadata.entity.EntityService; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java index 315adb7e84fc..14f71f087c74 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.config; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.BuildIndices; import com.linkedin.gms.factory.config.ConfigurationProvider; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CDCSetupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CDCSetupConfig.java index 0e319479d82f..1c2ed8ca61ee 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CDCSetupConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CDCSetupConfig.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.config; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.cdc.CDCSourceSetup; import java.util.List; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java index 7559aaf3f3cd..3103ed674d53 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.config; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.CleanIndices; import com.linkedin.gms.factory.config.ConfigurationProvider; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/GeneralUpgradeConfiguration.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/GeneralUpgradeConfiguration.java new file mode 100644 index 000000000000..e53cd33961c0 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/GeneralUpgradeConfiguration.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.gms.factory.auth.AuthorizerChainFactory; +import com.linkedin.gms.factory.auth.DataHubAuthorizerFactory; +import com.linkedin.gms.factory.event.ExternalEventsServiceFactory; +import com.linkedin.gms.factory.event.KafkaConsumerPoolFactory; +import com.linkedin.gms.factory.graphql.GraphQLEngineFactory; +import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; +import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory; +import com.linkedin.gms.factory.kafka.trace.KafkaTraceReaderFactory; +import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; +import com.linkedin.gms.factory.trace.TraceServiceFactory; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.FilterType; + +/** + * Configuration for general upgrades that includes most components but excludes some that are not + * typically needed for upgrade operations. + */ +@Configuration +@EnableAutoConfiguration +@ComponentScan( + basePackages = { + "com.linkedin.gms.factory", + "com.linkedin.datahub.upgrade.config", + "com.linkedin.datahub.upgrade.system.cdc", + "com.linkedin.metadata.dao.producer" + }, + excludeFilters = { + @ComponentScan.Filter( + type = FilterType.ASSIGNABLE_TYPE, + classes = { + ScheduledAnalyticsFactory.class, + AuthorizerChainFactory.class, + DataHubAuthorizerFactory.class, + SimpleKafkaConsumerFactory.class, + KafkaEventConsumerFactory.class, + GraphQLEngineFactory.class, + KafkaTraceReaderFactory.class, + TraceServiceFactory.class, + KafkaConsumerPoolFactory.class, + ExternalEventsServiceFactory.class + }) + }) +public class GeneralUpgradeConfiguration {} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/KafkaSetupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/KafkaSetupConfig.java index 49b02de2cb37..7baa13adc97d 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/KafkaSetupConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/KafkaSetupConfig.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.config; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.kafka.KafkaSetup; import com.linkedin.gms.factory.config.ConfigurationProvider; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NonBlockingConfigs.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NonBlockingConfigs.java index 2f016e5ce623..36bf4ef72ade 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NonBlockingConfigs.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NonBlockingConfigs.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.config; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.browsepaths.BackfillBrowsePathsV2; import com.linkedin.datahub.upgrade.system.browsepaths.BackfillIcebergBrowsePathsV2; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java index f43c01af600a..2f37a0b0297f 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java @@ -4,7 +4,10 @@ import com.linkedin.gms.factory.system_telemetry.OpenTelemetryBaseFactory; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.SystemTelemetryContext; +import javax.annotation.Nullable; import org.apache.kafka.clients.producer.Producer; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -21,7 +24,8 @@ protected String getApplicationComponent() { protected SystemTelemetryContext traceContext( MetricUtils metricUtils, ConfigurationProvider configurationProvider, - Producer dueProducer) { + @Autowired(required = false) @Qualifier("dataHubUsageProducer") @Nullable + Producer dueProducer) { return super.traceContext(metricUtils, configurationProvider, dueProducer); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemCronConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemCronConfig.java index 848c06a1a3bd..7d9a8a58f556 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemCronConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemCronConfig.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCronCondition; import com.linkedin.datahub.upgrade.system.cron.SystemUpdateCron; import com.linkedin.datahub.upgrade.system.cron.steps.TweakReplicasStep; import com.linkedin.metadata.shared.ElasticSearchIndexed; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index 2c2630e2605e..89ae6bb5548f 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade.config; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.SystemUpdate; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/PropertyDefinitionsConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/PropertyDefinitionsConfig.java index 49cd0beb98ea..1dff60af9f52 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/PropertyDefinitionsConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/PropertyDefinitionsConfig.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.upgrade.config.restoreindices; -import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.restoreindices.structuredproperties.PropertyDefinitions; import com.linkedin.metadata.entity.AspectDao; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDashboardInfoConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDashboardInfoConfig.java index 54beed11f9cb..a7157f9e0b14 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDashboardInfoConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDashboardInfoConfig.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.upgrade.config.restoreindices; -import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.restoreindices.dashboardinfo.ReindexDashboardInfo; import com.linkedin.metadata.entity.AspectDao; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDomainDescriptionConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDomainDescriptionConfig.java index 0ec9eb38e2b1..1ad31be9eaf2 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDomainDescriptionConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/ReindexDomainDescriptionConfig.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.upgrade.config.restoreindices; -import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.restoreindices.domaindescription.ReindexDomainDescription; import com.linkedin.metadata.entity.AspectDao; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexDataJobViaNodesCLLConfig.java index 730991cd0a69..9182561351e9 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexDataJobViaNodesCLLConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexDataJobViaNodesCLLConfig.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.upgrade.config.restoreindices.graph; -import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.restoreindices.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.entity.AspectDao; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexEdgeStatusConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexEdgeStatusConfig.java index 14b60f44c09a..bb8ff02d971e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexEdgeStatusConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/restoreindices/graph/ReindexEdgeStatusConfig.java @@ -1,6 +1,6 @@ package com.linkedin.datahub.upgrade.config.restoreindices.graph; -import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.conditions.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.restoreindices.graph.edgestatus.ReindexEdgeStatus; import com.linkedin.metadata.entity.AspectDao; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndices.java new file mode 100644 index 000000000000..a3daeb9aea97 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndices.java @@ -0,0 +1,110 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.upgrade.Upgrade; +import com.linkedin.datahub.upgrade.UpgradeCleanupStep; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.shared.ElasticSearchUpgradeUtils; +import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.service.UpdateIndicesService; +import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; +import io.ebean.Database; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import javax.annotation.Nullable; + +public class LoadIndices implements Upgrade { + public static final String BATCH_SIZE_ARG_NAME = "batchSize"; + public static final String LIMIT_ARG_NAME = "limit"; + public static final String URN_LIKE_ARG_NAME = "urnLike"; + public static final String GE_PIT_EPOCH_MS_ARG_NAME = "gePitEpochMs"; + public static final String LE_PIT_EPOCH_MS_ARG_NAME = "lePitEpochMs"; + public static final String ASPECT_NAMES_ARG_NAME = "aspectNames"; + public static final String LAST_URN_ARG_NAME = "lastUrn"; + + private final List _steps; + + public LoadIndices( + @Nullable final Database server, + final EntityService entityService, + final UpdateIndicesService updateIndicesService, + @Nullable final LoadIndicesIndexManager indexManager, + @Nullable final SystemMetadataService systemMetadataService, + @Nullable final TimeseriesAspectService timeseriesAspectService, + @Nullable final EntitySearchService entitySearchService, + @Nullable final GraphService graphService, + @Nullable final AspectDao aspectDao) { + if (server != null && indexManager != null) { + _steps = + buildSteps( + server, + entityService, + updateIndicesService, + indexManager, + systemMetadataService, + timeseriesAspectService, + entitySearchService, + graphService, + aspectDao); + } else { + _steps = List.of(); + } + } + + @Override + public String id() { + return "LoadIndices"; + } + + @Override + public List steps() { + return _steps; + } + + private List buildSteps( + final Database server, + final EntityService entityService, + final UpdateIndicesService updateIndicesService, + final LoadIndicesIndexManager indexManager, + final SystemMetadataService systemMetadataService, + final TimeseriesAspectService timeseriesAspectService, + final EntitySearchService entitySearchService, + final GraphService graphService, + final AspectDao aspectDao) { + final List steps = new ArrayList<>(); + + if (systemMetadataService != null + && timeseriesAspectService != null + && entitySearchService != null + && graphService != null + && aspectDao != null) { + + final Set> structuredProperties = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(aspectDao); + + List indexedServices = + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + graphService, entitySearchService, systemMetadataService, timeseriesAspectService); + + steps.add(new BuildIndicesStep(indexedServices, structuredProperties)); + } + + steps.add(new LoadIndicesStep(server, entityService, updateIndicesService, indexManager)); + return steps; + } + + @Override + public List cleanupSteps() { + return ImmutableList.of(); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesArgs.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesArgs.java new file mode 100644 index 000000000000..729da70abea4 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesArgs.java @@ -0,0 +1,27 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import java.util.Collection; +import lombok.Data; + +@Data +public class LoadIndicesArgs { + public int batchSize; + public int limit; + public String urnLike; + public Long lePitEpochMs; + public Long gePitEpochMs; + public Collection aspectNames; + public String lastUrn; + + public LoadIndicesArgs clone() { + LoadIndicesArgs cloned = new LoadIndicesArgs(); + cloned.batchSize = this.batchSize; + cloned.limit = this.limit; + cloned.urnLike = this.urnLike; + cloned.lePitEpochMs = this.lePitEpochMs; + cloned.gePitEpochMs = this.gePitEpochMs; + cloned.aspectNames = this.aspectNames; + cloned.lastUrn = this.lastUrn; + return cloned; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManager.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManager.java new file mode 100644 index 000000000000..b1e438608f2c --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManager.java @@ -0,0 +1,210 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; +import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.metadata.utils.elasticsearch.SearchClientShim; +import com.linkedin.metadata.utils.elasticsearch.responses.GetIndexResponse; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.indices.GetIndexRequest; + +/** + * Manages DataHub Elasticsearch indices during bulk loading operations. Discovers DataHub indices + * and manages their refresh intervals and replica counts for optimal bulk loading performance. Uses + * the same patterns as existing reindexing code with ReindexConfig and ESIndexBuilder. + */ +@Slf4j +public class LoadIndicesIndexManager { + + private static final String DISABLED_REFRESH_INTERVAL = "-1"; + + private final SearchClientShim searchClient; + private final IndexConvention indexConvention; + private final ESIndexBuilder indexBuilder; + private List managedIndexConfigs; + + /** -- GETTER -- Returns true if index settings are currently optimized for bulk operations. */ + @Getter private boolean settingsOptimized = false; + + private boolean indicesDiscovered = false; + + public LoadIndicesIndexManager( + SearchClientShim searchClient, + IndexConvention indexConvention, + ESIndexBuilder indexBuilder) { + this.searchClient = searchClient; + this.indexConvention = indexConvention; + this.indexBuilder = indexBuilder; + // Delay index discovery until first use + this.managedIndexConfigs = new ArrayList<>(); + } + + /** + * Discovers all DataHub indices that should have settings managed during bulk operations. This + * includes entity indices, graph service indices, and system metadata indices since these are all + * stored in SQL and will be modified by load indices operations. Timeseries indices are excluded + * since they are not stored in SQL. + * + * @return List of ReindexConfig objects for managed indices + * @throws IOException if there's an error communicating with Elasticsearch + */ + public List discoverDataHubIndexConfigs() throws IOException { + List configs = new ArrayList<>(); + + // Get entity indices using IndexConvention pattern + String entityPattern = indexConvention.getAllEntityIndicesPattern(); + log.debug("Querying entity indices with pattern: {}", entityPattern); + GetIndexRequest entityRequest = new GetIndexRequest(entityPattern); + GetIndexResponse entityResponse = searchClient.getIndex(entityRequest, RequestOptions.DEFAULT); + String[] entityIndices = entityResponse.getIndices(); + + for (String indexName : entityIndices) { + try { + ReindexConfig config = indexBuilder.buildReindexState(indexName, Map.of(), Map.of()); + configs.add(config); + log.debug("Added entity index config: {}", indexName); + } catch (IOException e) { + log.warn( + "Failed to build reindex config for entity index {}: {}", indexName, e.getMessage()); + } + } + + // Get graph service index + String graphIndexName = indexConvention.getIndexName(ElasticSearchGraphService.INDEX_NAME); + log.debug("Querying graph service index: {}", graphIndexName); + GetIndexRequest graphRequest = new GetIndexRequest(graphIndexName); + try { + GetIndexResponse graphResponse = searchClient.getIndex(graphRequest, RequestOptions.DEFAULT); + String[] graphIndices = graphResponse.getIndices(); + for (String indexName : graphIndices) { + try { + ReindexConfig config = indexBuilder.buildReindexState(indexName, Map.of(), Map.of()); + configs.add(config); + log.debug("Added graph service index config: {}", indexName); + } catch (IOException e) { + log.warn( + "Failed to build reindex config for graph index {}: {}", indexName, e.getMessage()); + } + } + } catch (Exception e) { + log.debug( + "Graph service index {} does not exist or is not accessible: {}", + graphIndexName, + e.getMessage()); + } + + // Get system metadata index + String systemMetadataIndexName = + indexConvention.getIndexName(ElasticSearchSystemMetadataService.INDEX_NAME); + log.debug("Querying system metadata index: {}", systemMetadataIndexName); + GetIndexRequest systemMetadataRequest = new GetIndexRequest(systemMetadataIndexName); + try { + GetIndexResponse systemMetadataResponse = + searchClient.getIndex(systemMetadataRequest, RequestOptions.DEFAULT); + String[] systemMetadataIndices = systemMetadataResponse.getIndices(); + for (String indexName : systemMetadataIndices) { + try { + ReindexConfig config = indexBuilder.buildReindexState(indexName, Map.of(), Map.of()); + configs.add(config); + log.debug("Added system metadata index config: {}", indexName); + } catch (IOException e) { + log.warn( + "Failed to build reindex config for system metadata index {}: {}", + indexName, + e.getMessage()); + } + } + } catch (Exception e) { + log.debug( + "System metadata index {} does not exist or is not accessible: {}", + systemMetadataIndexName, + e.getMessage()); + } + + return configs; + } + + /** + * Optimizes index settings for bulk operations by disabling refresh and setting replicas to zero. + */ + public void optimizeForBulkOperations() throws IOException { + if (settingsOptimized) { + log.warn("Index settings are already optimized for bulk operations"); + return; + } + + // Discover indices lazily on first use (after BuildIndicesStep has run) + if (!indicesDiscovered) { + log.info("Discovering DataHub indices for settings optimization..."); + this.managedIndexConfigs = discoverDataHubIndexConfigs(); + this.indicesDiscovered = true; + } + + log.info("Optimizing settings for bulk operations on {} indices", managedIndexConfigs.size()); + + for (ReindexConfig config : managedIndexConfigs) { + try { + // Disable refresh interval for bulk operations + indexBuilder.setIndexRefreshInterval(config.name(), DISABLED_REFRESH_INTERVAL); + + indexBuilder.tweakReplicas(config, false); + + log.debug("Optimized settings for index: {}", config.name()); + } catch (IOException e) { + log.error("Failed to optimize settings for index: {}", config.name(), e); + throw e; + } + } + + settingsOptimized = true; + log.info("Successfully optimized settings for bulk operations on all managed indices"); + } + + /** Restores index settings to configured values for all managed indices. */ + public void restoreFromConfiguration() throws IOException { + if (!settingsOptimized) { + log.warn("Index settings are not currently optimized"); + return; + } + + log.info("Restoring settings to configured values for {} indices", managedIndexConfigs.size()); + + for (ReindexConfig config : managedIndexConfigs) { + try { + // Get target settings from ReindexConfig (includes per-index overrides) + Map targetSettings = config.targetSettings(); + Map indexSettings = (Map) targetSettings.get("index"); + + // Extract refresh interval and replica count from target settings + String targetRefreshInterval = (String) indexSettings.get(ESIndexBuilder.REFRESH_INTERVAL); + Integer targetReplicaCount = (Integer) indexSettings.get(ESIndexBuilder.NUMBER_OF_REPLICAS); + + // Restore refresh interval to target value (includes per-index overrides) + indexBuilder.setIndexRefreshInterval(config.name(), targetRefreshInterval); + + // Restore replica count to target value (includes per-index overrides) + indexBuilder.setIndexReplicaCount(config.name(), targetReplicaCount); + + log.debug( + "Restored settings for index: {} to refresh: {}, replicas: {}", + config.name(), + targetRefreshInterval, + targetReplicaCount); + } catch (IOException e) { + log.error("Failed to restore settings for index: {}", config.name(), e); + throw e; + } + } + + settingsOptimized = false; + log.info("Successfully restored settings to configured values for all managed indices"); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesResult.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesResult.java new file mode 100644 index 000000000000..081fc5be2f39 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesResult.java @@ -0,0 +1,18 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import lombok.Data; + +@Data +public class LoadIndicesResult { + public int rowsProcessed = 0; + public int ignored = 0; + public long timeSqlQueryMs = 0; + public long timeElasticsearchWriteMs = 0; + + @Override + public String toString() { + return String.format( + "LoadIndicesResult{rowsProcessed=%d, ignored=%d, timeSqlQueryMs=%d, timeElasticsearchWriteMs=%d}", + rowsProcessed, ignored, timeSqlQueryMs, timeElasticsearchWriteMs); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesStep.java new file mode 100644 index 000000000000..d02671103c7b --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesStep.java @@ -0,0 +1,556 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.UpgradeStepResult; +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.config.EbeanConfiguration; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.EbeanAspectDao; +import com.linkedin.metadata.entity.ebean.EbeanAspectV2; +import com.linkedin.metadata.entity.ebean.PartitionedStream; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.service.UpdateIndicesService; +import com.linkedin.metadata.utils.PegasusUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.upgrade.DataHubUpgradeState; +import io.datahubproject.metadata.context.OperationContext; +import io.ebean.Database; +import io.ebean.ExpressionList; +import io.ebean.annotation.TxIsolation; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class LoadIndicesStep implements UpgradeStep { + + private final Database server; + private final EntityService entityService; + private final UpdateIndicesService updateIndicesService; + private final LoadIndicesIndexManager indexManager; + + public LoadIndicesStep( + final Database server, + final EntityService entityService, + final UpdateIndicesService updateIndicesService, + final LoadIndicesIndexManager indexManager) { + this.server = server; + this.entityService = entityService; + this.updateIndicesService = updateIndicesService; + this.indexManager = indexManager; + } + + @Override + public String id() { + return "LoadIndicesStep"; + } + + @Override + public int retryCount() { + return 0; + } + + @Override + public Function executable() { + return (context) -> { + LoadIndicesArgs args = getArgs(context); + + try { + context + .report() + .addLine( + "Loading indices directly from local DB ordered by URN/aspect for optimal document batching"); + + indexManager.optimizeForBulkOperations(); + context.report().addLine("Optimized settings for bulk operations on DataHub indices"); + + log.info("Starting loadIndices"); + + long startTime = System.currentTimeMillis(); + + LoadIndicesResult result = + processAllDataDirectly( + context.opContext(), + args, + (msg) -> { + context.report().addLine(msg); + return null; + }); + + long totalTime = System.currentTimeMillis() - startTime; + context.report().addLine(String.format("Processing completed: %s", result)); + context + .report() + .addLine( + String.format( + "Total processing time: %.2f minutes", (float) totalTime / 1000 / 60)); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.SUCCEEDED); + + } catch (Exception e) { + log.error("Error during LoadIndices execution", e); + context.report().addLine(String.format("Error during execution: %s", e.getMessage())); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + } finally { + if (indexManager.isSettingsOptimized()) { + try { + indexManager.restoreFromConfiguration(); + context + .report() + .addLine("Restored settings to configured values for all DataHub indices"); + } catch (IOException e) { + log.error("Failed to restore settings", e); + context + .report() + .addLine(String.format("Warning: Failed to restore settings: %s", e.getMessage())); + } + } + } + }; + } + + private LoadIndicesArgs getArgs(UpgradeContext context) { + LoadIndicesArgs result = new LoadIndicesArgs(); + result.batchSize = getBatchSize(context.parsedArgs()); + result.limit = getLimit(context.parsedArgs()); + context.report().addLine(String.format("batchSize is %d", result.batchSize)); + if (result.limit == Integer.MAX_VALUE) { + context.report().addLine("limit is not applied (processing all matching records)"); + } else { + context.report().addLine(String.format("limit is %d", result.limit)); + } + + if (containsKey(context.parsedArgs(), LoadIndices.URN_LIKE_ARG_NAME)) { + result.urnLike = context.parsedArgs().get(LoadIndices.URN_LIKE_ARG_NAME).get(); + context.report().addLine(String.format("urnLike is %s", result.urnLike)); + } else { + context.report().addLine("No urnLike arg present"); + } + if (containsKey(context.parsedArgs(), LoadIndices.LE_PIT_EPOCH_MS_ARG_NAME)) { + result.lePitEpochMs = + Long.parseLong(context.parsedArgs().get(LoadIndices.LE_PIT_EPOCH_MS_ARG_NAME).get()); + context.report().addLine(String.format("lePitEpochMs is %s", result.lePitEpochMs)); + } + if (containsKey(context.parsedArgs(), LoadIndices.GE_PIT_EPOCH_MS_ARG_NAME)) { + result.gePitEpochMs = + Long.parseLong(context.parsedArgs().get(LoadIndices.GE_PIT_EPOCH_MS_ARG_NAME).get()); + context.report().addLine(String.format("gePitEpochMs is %s", result.gePitEpochMs)); + } + if (containsKey(context.parsedArgs(), LoadIndices.ASPECT_NAMES_ARG_NAME)) { + result.aspectNames = + Arrays.asList( + context.parsedArgs().get(LoadIndices.ASPECT_NAMES_ARG_NAME).get().split(",")); + context.report().addLine(String.format("aspectNames is %s", result.aspectNames)); + } else { + // Set default aspect names based on entity registry when not provided + result.aspectNames = getDefaultAspectNames(context.opContext()); + context + .report() + .addLine( + String.format("aspectNames not provided, using defaults: %s", result.aspectNames)); + } + if (containsKey(context.parsedArgs(), LoadIndices.LAST_URN_ARG_NAME)) { + result.lastUrn = context.parsedArgs().get(LoadIndices.LAST_URN_ARG_NAME).get(); + context.report().addLine(String.format("lastUrn is %s", result.lastUrn)); + } else { + context.report().addLine("No lastUrn arg present - will process from beginning"); + } + return result; + } + + private LoadIndicesResult processAllDataDirectly( + OperationContext opContext, LoadIndicesArgs args, Function reportFunction) { + LoadIndicesResult result = new LoadIndicesResult(); + long totalStartTime = System.currentTimeMillis(); + + try { + // Create EbeanAspectDao for streaming + EbeanAspectDao aspectDao = new EbeanAspectDao(server, EbeanConfiguration.testDefault, null); + aspectDao.setConnectionValidated(true); + + // Process data using streaming approach (no cursor pagination needed!) + int batchSize = args.batchSize; + final int[] totalProcessed = {0}; // Use array to make it effectively final + final int limit = args.limit; + + // Determine totalRecords based on whether limit is specified + final long totalRecords; + if (limit == Integer.MAX_VALUE) { + reportFunction.apply("No limit specified - counting total aspects for ETA calculation..."); + long countStartTime = System.currentTimeMillis(); + + ExpressionList countQuery = + server + .find(EbeanAspectV2.class) + .where() + .eq(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION); + + // Apply same filters as main query + if (args.urnLike != null) { + countQuery = countQuery.like(EbeanAspectV2.URN_COLUMN, args.urnLike); + } + if (args.aspectNames != null && !args.aspectNames.isEmpty()) { + countQuery = countQuery.in(EbeanAspectV2.ASPECT_COLUMN, args.aspectNames); + } + if (args.lePitEpochMs != null) { + countQuery = countQuery.le(EbeanAspectV2.CREATED_ON_COLUMN, args.lePitEpochMs); + } + if (args.gePitEpochMs != null) { + countQuery = countQuery.ge(EbeanAspectV2.CREATED_ON_COLUMN, args.gePitEpochMs); + } + if (args.lastUrn != null) { + countQuery = countQuery.ge(EbeanAspectV2.URN_COLUMN, args.lastUrn); + } + + totalRecords = countQuery.findCount(); + long countTime = System.currentTimeMillis() - countStartTime; + + reportFunction.apply( + String.format( + "Found %d total aspects to process (count took %.2f seconds)", + totalRecords, countTime / 1000.0)); + } else { + totalRecords = limit; + reportFunction.apply( + String.format("Limit specified (%d) - will use this for progress calculation", limit)); + } + + reportFunction.apply( + "Starting main data query - this may take a moment for SQL to return the first batch..."); + + // Use streaming approach ordered by URN/aspect for optimal ES document batching + RestoreIndicesArgs restoreArgs = convertToRestoreIndicesArgs(args, limit); + try (PartitionedStream stream = + aspectDao.streamAspectBatches(restoreArgs, TxIsolation.READ_UNCOMMITTED)) { + + // Simple forEach approach since SQL handles the limiting + stream + .partition(batchSize) + .forEach( + batch -> { + long batchStartTime = System.currentTimeMillis(); + + List aspects = batch.collect(java.util.stream.Collectors.toList()); + + if (aspects.isEmpty()) { + return; + } + + result.timeSqlQueryMs += System.currentTimeMillis() - batchStartTime; + + // Pre-allocate list to avoid multiple resizing + List mclBatch = new ArrayList<>(aspects.size()); + int conversionErrors = 0; + for (EbeanAspectV2 aspect : aspects) { + try { + MetadataChangeLog mcl = convertToMetadataChangeLog(opContext, aspect); + mclBatch.add(mcl); + } catch (Exception e) { + log.debug("Error converting aspect: {}", aspect.getKey(), e); + conversionErrors++; + result.ignored++; + } + } + + if (!mclBatch.isEmpty()) { + writeBatchWithRetry(opContext, mclBatch, result, reportFunction); + int aspectsProcessed = aspects.size() - conversionErrors; + totalProcessed[0] += aspectsProcessed; + + // Log the last URN of every batch for resume capability + String lastUrn = aspects.get(aspects.size() - 1).getKey().getUrn(); + + if (totalProcessed[0] % batchSize == 0 || conversionErrors > 0) { + long currentTime = System.currentTimeMillis(); + long elapsedTime = currentTime - totalStartTime; + double aspectsPerSecond = (double) totalProcessed[0] / (elapsedTime / 1000.0); + + String progressMessage; + if (conversionErrors > 0) { + progressMessage = + String.format( + "Processed %d aspects (total: %d, %d conversion errors) - %.1f aspects/sec", + aspectsProcessed, + totalProcessed[0], + conversionErrors, + aspectsPerSecond); + } else { + progressMessage = + String.format( + "Processed %d aspects - %.1f aspects/sec", + totalProcessed[0], aspectsPerSecond); + } + + if (totalRecords > 0 && aspectsPerSecond > 0 && totalProcessed[0] > 50000) { + long remainingAspects; + if (limit != Integer.MAX_VALUE) { + remainingAspects = Math.min(limit, totalRecords) - totalProcessed[0]; + } else { + remainingAspects = totalRecords - totalProcessed[0]; + } + + if (remainingAspects > 0) { + long estimatedRemainingMs = + (long) (remainingAspects / aspectsPerSecond * 1000); + long estimatedRemainingMinutes = estimatedRemainingMs / 60000; + long estimatedRemainingSeconds = (estimatedRemainingMs % 60000) / 1000; + + int progressPercent = (int) ((totalProcessed[0] * 100L) / totalRecords); + progressMessage += + String.format( + " - Progress: %d%% - ETA: %dm %ds", + progressPercent, + estimatedRemainingMinutes, + estimatedRemainingSeconds); + } + } + + reportFunction.apply(progressMessage); + reportFunction.apply("Last URN processed: " + lastUrn); + } + } + }); + } + + result.timeElasticsearchWriteMs = + System.currentTimeMillis() - totalStartTime - result.timeSqlQueryMs; + + try { + updateIndicesService.flush(); + log.info("Final flush completed - all data written to Elasticsearch"); + } catch (Exception e) { + log.error("Failed to perform final flush: {}", e.getMessage()); + } + + result.rowsProcessed = totalProcessed[0]; + long totalTimeMs = System.currentTimeMillis() - totalStartTime; + double finalThroughput = (double) result.rowsProcessed / (totalTimeMs / 1000.0); + reportFunction.apply( + String.format( + "Processing completed: %d aspects processed, %d ignored - Final throughput: %.1f aspects/sec", + result.rowsProcessed, result.ignored, finalThroughput)); + + } catch (Exception e) { + log.error("Error in processAllDataDirectly", e); + throw new RuntimeException(e); + } + + return result; + } + + /** Writes a batch to UpdateIndices with retry logic that splits the batch in half if it fails. */ + private void writeBatchWithRetry( + OperationContext opContext, + List batch, + LoadIndicesResult result, + Function reportFunction) { + + List currentBatch = new ArrayList<>(batch); + int retryCount = 0; + final int maxRetries = 3; + + while (!currentBatch.isEmpty() && retryCount <= maxRetries) { + try { + updateIndicesService.handleChangeEvents(opContext, currentBatch); + + if (retryCount > 0) { + log.info( + "Successfully wrote batch of {} MCL events after {} retries", + currentBatch.size(), + retryCount); + } + break; + + } catch (Exception e) { + retryCount++; + log.warn( + "Failed to write batch of {} MCL events (attempt {}): {}", + currentBatch.size(), + retryCount, + e.getMessage()); + + if (retryCount > maxRetries) { + log.error( + "Max retries ({}) exceeded for batch of {} MCL events. Giving up.", + maxRetries, + currentBatch.size()); + result.ignored += currentBatch.size(); + reportFunction.apply( + String.format( + "Failed to write batch of %d MCL events after %d retries - ignoring", + currentBatch.size(), maxRetries)); + break; + } + + int splitPoint = currentBatch.size() / 2; + if (splitPoint == 0) { + log.error( + "Cannot split batch further (size: {}), marking as ignored", currentBatch.size()); + result.ignored += currentBatch.size(); + reportFunction.apply( + String.format( + "Failed to write single MCL event after %d retries - ignoring", retryCount)); + break; + } + + List firstHalf = currentBatch.subList(0, splitPoint); + List secondHalf = currentBatch.subList(splitPoint, currentBatch.size()); + + log.info( + "Splitting failed batch of {} MCL events into two batches of {} and {} for retry", + currentBatch.size(), + firstHalf.size(), + secondHalf.size()); + + try { + updateIndicesService.handleChangeEvents(opContext, firstHalf); + log.debug( + "Successfully wrote first half of split batch ({} MCL events)", firstHalf.size()); + } catch (Exception e2) { + log.error("Failed to write first half of split batch: {}", e2.getMessage()); + result.ignored += firstHalf.size(); + reportFunction.apply( + String.format( + "Failed to write first half of split batch (%d MCL events) - ignoring", + firstHalf.size())); + } + + currentBatch = secondHalf; + } + } + } + + private MetadataChangeLog convertToMetadataChangeLog( + OperationContext opContext, EbeanAspectV2 ebeanAspect) throws Exception { + Urn urn = UrnUtils.getUrn(ebeanAspect.getKey().getUrn()); + String aspectName = ebeanAspect.getKey().getAspect(); + String entityType = urn.getEntityType(); + + AuditStamp auditStamp = + new AuditStamp() + .setActor( + UrnUtils.getUrn( + opContext.getActorContext().getAuthentication().getActor().toUrnStr())) + .setTime(ebeanAspect.getCreatedOn().getTime()); + + SystemMetadata systemMetadata = null; + if (ebeanAspect.getSystemMetadata() != null) { + systemMetadata = + RecordUtils.toRecordTemplate(SystemMetadata.class, ebeanAspect.getSystemMetadata()); + } + + return PegasusUtils.constructMCL( + null, + entityType, + urn, + ChangeType.RESTATE, + aspectName, + auditStamp, + RecordUtils.toRecordTemplate( + opContext + .getEntityRegistry() + .getEntitySpec(entityType) + .getAspectSpec(aspectName) + .getDataTemplateClass(), + ebeanAspect.getMetadata()), + systemMetadata, + null, + null); + } + + private int getBatchSize(final Map> parsedArgs) { + return getInt(parsedArgs, 10000, LoadIndices.BATCH_SIZE_ARG_NAME); + } + + private int getLimit(final Map> parsedArgs) { + return getInt(parsedArgs, Integer.MAX_VALUE, LoadIndices.LIMIT_ARG_NAME); + } + + public boolean containsKey(Map> parsedArgs, String key) { + return parsedArgs.containsKey(key) + && parsedArgs.get(key) != null + && parsedArgs.get(key).isPresent(); + } + + private int getInt( + final Map> parsedArgs, int defaultVal, String argKey) { + int result = defaultVal; + if (containsKey(parsedArgs, argKey)) { + result = Integer.parseInt(parsedArgs.get(argKey).get()); + } + return result; + } + + /** + * Get default aspect names based on entity registry. Includes all aspects with searchable + * annotations and key aspects for entities that have at least one searchable aspect. + */ + private Set getDefaultAspectNames(OperationContext opContext) { + Set aspectNames = new HashSet<>(); + + for (String entityName : opContext.getEntityRegistry().getEntitySpecs().keySet()) { + try { + com.linkedin.metadata.models.EntitySpec entitySpec = + opContext.getEntityRegistry().getEntitySpec(entityName); + + boolean entityHasSearchableAspects = false; + + for (com.linkedin.metadata.models.AspectSpec aspectSpec : entitySpec.getAspectSpecs()) { + if (!aspectSpec.getSearchableFieldSpecs().isEmpty()) { + entityHasSearchableAspects = true; + aspectNames.add(aspectSpec.getName()); + } + } + + if (entityHasSearchableAspects) { + String keyAspectName = entitySpec.getKeyAspectName(); + if (!aspectNames.contains(keyAspectName)) { + aspectNames.add(keyAspectName); + } + } + } catch (Exception e) { + log.warn( + "Error processing entity {} for default aspect names: {}", entityName, e.getMessage()); + } + } + + return aspectNames; + } + + /** Convert LoadIndicesArgs to RestoreIndicesArgs for compatibility with streamAspectBatches. */ + private RestoreIndicesArgs convertToRestoreIndicesArgs(LoadIndicesArgs args, int limit) { + RestoreIndicesArgs restoreArgs = new RestoreIndicesArgs(); + + if (args.aspectNames != null && !args.aspectNames.isEmpty()) { + restoreArgs.aspectNames = new ArrayList<>(args.aspectNames); + } + restoreArgs.urnLike = args.urnLike; + restoreArgs.gePitEpochMs = args.gePitEpochMs != null ? args.gePitEpochMs : 0L; + restoreArgs.lePitEpochMs = + args.lePitEpochMs != null ? args.lePitEpochMs : System.currentTimeMillis(); + restoreArgs.limit = limit; + + // Enable URN-based pagination if lastUrn is provided + if (args.lastUrn != null) { + restoreArgs.urnBasedPagination = true; + restoreArgs.lastUrn = args.lastUrn; + } + + return restoreArgs; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesUpgradeConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesUpgradeConfig.java new file mode 100644 index 000000000000..f7777a0f6f6e --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesUpgradeConfig.java @@ -0,0 +1,35 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import com.linkedin.datahub.upgrade.config.OpenTelemetryConfig; +import org.springframework.boot.actuate.autoconfigure.metrics.MetricsAutoConfiguration; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.FilterType; +import org.springframework.context.annotation.Import; + +/** + * Configuration for LoadIndices upgrade that excludes Kafka components to prevent connection + * attempts during index loading. + */ +@Configuration +@Import({MetricsAutoConfiguration.class, OpenTelemetryConfig.class}) +@ComponentScan( + basePackages = { + "com.linkedin.datahub.upgrade.loadindices.config", + "com.linkedin.gms.factory.config", + "com.linkedin.gms.factory.common", + "com.linkedin.gms.factory.entity", + "com.linkedin.gms.factory.entityclient", + "com.linkedin.gms.factory.plugins", + "com.linkedin.gms.factory.entityregistry", + "com.linkedin.gms.factory.search", + "com.linkedin.gms.factory.timeseries", + "com.linkedin.gms.factory.context", + "com.linkedin.gms.factory.system_telemetry" + }, + excludeFilters = { + @ComponentScan.Filter( + type = FilterType.ASSIGNABLE_TYPE, + classes = {}) + }) +public class LoadIndicesUpgradeConfig {} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/NoOpKafkaEventProducer.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/NoOpKafkaEventProducer.java new file mode 100644 index 000000000000..4ee3945f9716 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/NoOpKafkaEventProducer.java @@ -0,0 +1,201 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.dao.producer.KafkaEventProducer; +import com.linkedin.metadata.dao.producer.KafkaHealthChecker; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.mxe.DataHubUpgradeHistoryEvent; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.PlatformEvent; +import com.linkedin.mxe.TopicConventionImpl; +import io.datahubproject.metadata.context.OperationContext; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Future; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.IndexedRecord; +import org.apache.kafka.clients.producer.Producer; + +/** + * No-op implementation of KafkaEventProducer for LoadIndices upgrade operations. This prevents + * connection attempts to Kafka during index loading operations. + */ +@Slf4j +public class NoOpKafkaEventProducer extends KafkaEventProducer { + + public NoOpKafkaEventProducer() { + // Call parent constructor with no-op implementations + super( + createNoOpProducer(), + new TopicConventionImpl(), + new NoOpKafkaHealthChecker(), + MetricUtils.builder().registry(new SimpleMeterRegistry()).build()); + } + + @SuppressWarnings("unchecked") + private static Producer createNoOpProducer() { + // Use a simple mock that doesn't implement the full interface + return new Producer() { + @Override + public Future send( + org.apache.kafka.clients.producer.ProducerRecord record) { + return CompletableFuture.completedFuture(null); + } + + @Override + public Future send( + org.apache.kafka.clients.producer.ProducerRecord record, + org.apache.kafka.clients.producer.Callback callback) { + return CompletableFuture.completedFuture(null); + } + + @Override + public void flush() { + // No-op + } + + @Override + public void close() { + // No-op + } + + @Override + public void close(java.time.Duration timeout) { + // No-op + } + + @Override + public org.apache.kafka.common.Uuid clientInstanceId(java.time.Duration timeout) { + return org.apache.kafka.common.Uuid.randomUuid(); + } + + @Override + public java.util.Map< + org.apache.kafka.common.MetricName, ? extends org.apache.kafka.common.Metric> + metrics() { + return java.util.Collections.emptyMap(); + } + + @Override + public java.util.List partitionsFor(String topic) { + return java.util.Collections.emptyList(); + } + + @Override + public void registerMetricForSubscription( + org.apache.kafka.common.metrics.KafkaMetric metric) { + // No-op + } + + @Override + public void unregisterMetricFromSubscription( + org.apache.kafka.common.metrics.KafkaMetric metric) { + // No-op + } + + @Override + public void initTransactions() { + // No-op + } + + @Override + public void beginTransaction() { + // No-op + } + + @Override + public void sendOffsetsToTransaction( + java.util.Map< + org.apache.kafka.common.TopicPartition, + org.apache.kafka.clients.consumer.OffsetAndMetadata> + offsets, + org.apache.kafka.clients.consumer.ConsumerGroupMetadata groupMetadata) { + // No-op + } + + @Override + public void commitTransaction() { + // No-op + } + + @Override + public void abortTransaction() { + // No-op + } + }; + } + + @Override + public Future produceMetadataChangeLog( + @Nonnull final Urn urn, + @Nullable AspectSpec aspectSpec, + @Nonnull final MetadataChangeLog metadataChangeLog) { + log.debug("NoOpKafkaEventProducer: Skipping MCL production for urn: {}", urn); + return CompletableFuture.completedFuture(null); + } + + @Override + public String getMetadataChangeLogTopicName(@Nonnull AspectSpec aspectSpec) { + return "no-op-mcl-topic"; + } + + @Override + public Future produceMetadataChangeProposal( + @Nonnull final Urn urn, @Nonnull MetadataChangeProposal metadataChangeProposal) { + log.debug("NoOpKafkaEventProducer: Skipping MCP production for urn: {}", urn); + return CompletableFuture.completedFuture(null); + } + + @Override + public String getMetadataChangeProposalTopicName() { + return "no-op-mcp-topic"; + } + + @Override + public Future produceFailedMetadataChangeProposalAsync( + @Nonnull OperationContext opContext, + @Nonnull MetadataChangeProposal mcp, + @Nonnull Set throwables) { + log.debug( + "NoOpKafkaEventProducer: Skipping failed MCP production for urn: {}", mcp.getEntityUrn()); + return CompletableFuture.completedFuture(null); + } + + @Override + public Future producePlatformEvent( + @Nonnull String name, @Nullable String key, @Nonnull PlatformEvent payload) { + log.debug("NoOpKafkaEventProducer: Skipping platform event production: {}", name); + return CompletableFuture.completedFuture(null); + } + + @Override + public String getPlatformEventTopicName() { + return "no-op-platform-topic"; + } + + @Override + public void produceDataHubUpgradeHistoryEvent(@Nonnull DataHubUpgradeHistoryEvent event) { + log.debug("NoOpKafkaEventProducer: Skipping DataHub upgrade history event production"); + } + + @Override + public void flush() { + log.debug("NoOpKafkaEventProducer: Flush called - no-op"); + } + + /** No-op implementation of KafkaHealthChecker */ + private static class NoOpKafkaHealthChecker extends KafkaHealthChecker { + @Override + public org.apache.kafka.clients.producer.Callback getKafkaCallBack( + MetricUtils metricUtils, String eventType, String entityUrn) { + return (metadata, exception) -> { + // No-op callback + }; + } + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfig.java new file mode 100644 index 000000000000..e35f255954e8 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfig.java @@ -0,0 +1,88 @@ +package com.linkedin.datahub.upgrade.loadindices.config; + +import com.linkedin.datahub.upgrade.loadindices.LoadIndices; +import com.linkedin.datahub.upgrade.loadindices.LoadIndicesIndexManager; +import com.linkedin.datahub.upgrade.loadindices.NoOpKafkaEventProducer; +import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.metadata.dao.producer.KafkaEventProducer; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.service.UpdateIndicesService; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.metadata.utils.elasticsearch.SearchClientShim; +import io.datahubproject.metadata.context.OperationContext; +import io.ebean.Database; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; + +@Slf4j +@Configuration +@Import(SystemAuthenticationFactory.class) +public class LoadIndicesConfig { + + /** + * Provides a no-op KafkaEventProducer for LoadIndices upgrade operations. This prevents + * connection attempts to Kafka during index loading. + */ + @Bean(name = "kafkaEventProducer") + @ConditionalOnMissingBean(name = "kafkaEventProducer") + @Nonnull + public KafkaEventProducer noOpKafkaEventProducer() { + log.info("Creating NoOpKafkaEventProducer for LoadIndices upgrade operations"); + return new NoOpKafkaEventProducer(); + } + + @Bean(name = "loadIndicesIndexManager") + @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) + @Nonnull + public LoadIndicesIndexManager createIndexManager( + @Qualifier("systemOperationContext") final OperationContext systemOperationContext, + @Qualifier("searchClientShim") SearchClientShim searchClient, + @Qualifier("elasticSearchIndexBuilder") + final com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder indexBuilder) + throws Exception { + return new LoadIndicesIndexManager( + searchClient, systemOperationContext.getSearchContext().getIndexConvention(), indexBuilder); + } + + @Bean(name = "loadIndices") + @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) + @Nonnull + public LoadIndices createInstance( + final Database ebeanServer, + final EntityService entityService, + final UpdateIndicesService updateIndicesService, + @Qualifier("loadIndicesIndexManager") final LoadIndicesIndexManager indexManager, + final SystemMetadataService systemMetadataService, + final TimeseriesAspectService timeseriesAspectService, + final EntitySearchService entitySearchService, + final GraphService graphService, + final AspectDao aspectDao) { + return new LoadIndices( + ebeanServer, + entityService, + updateIndicesService, + indexManager, + systemMetadataService, + timeseriesAspectService, + entitySearchService, + graphService, + aspectDao); + } + + @Bean(name = "loadIndicesCassandra") + @ConditionalOnProperty(name = "entityService.impl", havingValue = "cassandra") + @Nonnull + public LoadIndices createNotImplInstance() { + throw new IllegalStateException("loadIndices is not supported for cassandra!"); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/shared/ElasticSearchUpgradeUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/shared/ElasticSearchUpgradeUtils.java new file mode 100644 index 000000000000..5278a9400702 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/shared/ElasticSearchUpgradeUtils.java @@ -0,0 +1,96 @@ +package com.linkedin.datahub.upgrade.shared; + +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.EntityAspect; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Shared utilities for Elasticsearch upgrade operations. Contains common functionality used by both + * BuildIndices and LoadIndices upgrades. + */ +public class ElasticSearchUpgradeUtils { + + /** + * Creates a list of ElasticSearchIndexed services from the provided services. Filters out + * services that don't implement ElasticSearchIndexed. + * + * @param graphService the graph service + * @param entitySearchService the entity search service + * @param systemMetadataService the system metadata service + * @param timeseriesAspectService the timeseries aspect service + * @return list of ElasticSearchIndexed services + */ + public static List createElasticSearchIndexedServices( + GraphService graphService, + EntitySearchService entitySearchService, + SystemMetadataService systemMetadataService, + TimeseriesAspectService timeseriesAspectService) { + + return Stream.of( + graphService, entitySearchService, systemMetadataService, timeseriesAspectService) + .filter(service -> service instanceof ElasticSearchIndexed) + .map(service -> (ElasticSearchIndexed) service) + .collect(Collectors.toList()); + } + + /** + * Gets active structured properties definitions from the database. This method retrieves all + * structured property definitions that are not marked as removed. + * + * @param aspectDao the aspect DAO for database access + * @return set of active structured property definitions + */ + public static Set> + getActiveStructuredPropertiesDefinitions(AspectDao aspectDao) { + + // First, get all removed structured property URNs + Set removedStructuredPropertyUrns; + try (Stream stream = + aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { + removedStructuredPropertyUrns = + stream + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) + .filter(status -> status.getSecond().isRemoved()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + } + + // Then, get all structured property definitions and filter out removed ones + try (Stream stream = + aspectDao.streamAspects( + STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + return stream + .map( + entityAspect -> + Pair.of( + UrnUtils.getUrn(entityAspect.getUrn()), + RecordUtils.toRecordTemplate( + StructuredPropertyDefinition.class, entityAspect.getMetadata()))) + .filter( + definition -> !removedStructuredPropertyUrns.contains(definition.getKey().toString())) + .collect(Collectors.toSet()); + } + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java index 752fc8027547..0539810d7b48 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java @@ -1,21 +1,14 @@ package com.linkedin.datahub.upgrade.system.elasticsearch; -import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; - -import com.datahub.util.RecordUtils; -import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.shared.ElasticSearchUpgradeUtils; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesPostStep; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesPreStep; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesStep; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.metadata.aspect.EntityAspect; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.EntitySearchService; @@ -27,8 +20,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; public class BuildIndices implements BlockingSystemUpgrade { @@ -45,10 +36,8 @@ public BuildIndices( final AspectDao aspectDao) { List indexedServices = - Stream.of(graphService, entitySearchService, systemMetadataService, timeseriesAspectService) - .filter(service -> service instanceof ElasticSearchIndexed) - .map(service -> (ElasticSearchIndexed) service) - .collect(Collectors.toList()); + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + graphService, entitySearchService, systemMetadataService, timeseriesAspectService); _steps = buildSteps(indexedServices, baseElasticSearchComponents, configurationProvider, aspectDao); @@ -73,7 +62,8 @@ private List buildSteps( final Set> structuredProperties; if (configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { - structuredProperties = getActiveStructuredPropertiesDefinitions(aspectDao); + structuredProperties = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(aspectDao); } else { structuredProperties = Set.of(); } @@ -95,37 +85,4 @@ private List buildSteps( baseElasticSearchComponents, indexedServices, structuredProperties)); return steps; } - - static Set> getActiveStructuredPropertiesDefinitions( - AspectDao aspectDao) { - Set removedStructuredPropertyUrns; - try (Stream stream = - aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { - removedStructuredPropertyUrns = - stream - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) - .filter(status -> status.getSecond().isRemoved()) - .map(Pair::getFirst) - .collect(Collectors.toSet()); - } - - try (Stream stream = - aspectDao.streamAspects( - STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { - return stream - .map( - entityAspect -> - Pair.of( - UrnUtils.getUrn(entityAspect.getUrn()), - RecordUtils.toRecordTemplate( - StructuredPropertyDefinition.class, entityAspect.getMetadata()))) - .filter( - definition -> !removedStructuredPropertyUrns.contains(definition.getKey().toString())) - .collect(Collectors.toSet()); - } - } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java index 96aea906b021..94bb83ab9b91 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java @@ -1,9 +1,8 @@ package com.linkedin.datahub.upgrade.system.elasticsearch; -import static com.linkedin.datahub.upgrade.system.elasticsearch.BuildIndices.getActiveStructuredPropertiesDefinitions; - import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.shared.ElasticSearchUpgradeUtils; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.CleanIndicesStep; import com.linkedin.gms.factory.config.ConfigurationProvider; @@ -18,8 +17,6 @@ import com.linkedin.util.Pair; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -38,16 +35,15 @@ public CleanIndices( final Set> structuredProperties; if (configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { - structuredProperties = getActiveStructuredPropertiesDefinitions(aspectDao); + structuredProperties = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(aspectDao); } else { structuredProperties = Set.of(); } List indexedServices = - Stream.of(graphService, entitySearchService, systemMetadataService, timeseriesAspectService) - .filter(service -> service instanceof ElasticSearchIndexed) - .map(service -> (ElasticSearchIndexed) service) - .collect(Collectors.toList()); + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + graphService, entitySearchService, systemMetadataService, timeseriesAspectService); _steps = List.of( diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java index 215ba9bada17..e697e9197925 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java @@ -50,9 +50,11 @@ public int retryCount() { public Function executable() { return (context) -> { try { + // Each service enumerates and cleans up their own indices IndexUtils.getAllReindexConfigs(indexedServices, structuredProperties) .forEach( - reindexConfig -> ESIndexBuilder.cleanIndex(searchClient, esConfig, reindexConfig)); + reindexConfig -> + ESIndexBuilder.cleanOrphanedIndices(searchClient, esConfig, reindexConfig)); } catch (Exception e) { log.error("CleanUpIndicesStep failed.", e); return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStep.java index d34dfcca7e88..543b2f2fb7ac 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStep.java @@ -76,11 +76,20 @@ public Function executable() { return (context) -> { ReindexDebugArgs args = createArgs(context); try { + if (service == null) { + log.error("ReindexDebugStep failed: No ElasticSearchService found"); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + } setConfig(args.index); + if (config == null) { + log.error("ReindexDebugStep failed: No matching config found for index: {}", args.index); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + } try { service.getIndexBuilder().buildIndex(config); } catch (IOException e) { - throw new RuntimeException(e); + log.error("ReindexDebugStep failed: IOException during buildIndex", e); + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); } } catch (Exception e) { log.error("ReindexDebugStep failed.", e); @@ -94,6 +103,7 @@ void setConfig(String targetIndex) throws IOException, IllegalAccessException { // datahubpolicyindex_v2 has some docs upon starting quickdebug... // String targetIndex = "datahubpolicyindex_v2"; List configs = service.buildReindexConfigs(structuredProperties); + config = null; // Reset config to null for (ReindexConfig cfg : configs) { String cfgname = cfg.name(); if (cfgname.startsWith(targetIndex)) { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/MigrateSchemaFieldDocIdsStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/MigrateSchemaFieldDocIdsStep.java index 57bcc1f4499d..a00e42526b22 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/MigrateSchemaFieldDocIdsStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/schemafield/MigrateSchemaFieldDocIdsStep.java @@ -92,7 +92,8 @@ public MigrateSchemaFieldDocIdsStep( batchSize, 1L, 1, - 3); + 3, + 1); // threadCount log.info("MigrateSchemaFieldDocIdsStep initialized"); } diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java index b5fced50b8dd..efc5da3fcca1 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade; import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.metadata.EbeanTestUtils; import com.linkedin.metadata.EventSchemaData; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -15,11 +16,15 @@ import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import jakarta.annotation.Nonnull; +import jakarta.annotation.PostConstruct; +import java.util.UUID; +import org.mockito.Mockito; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; @TestConfiguration @Import(value = {SystemAuthenticationFactory.class}) @@ -29,8 +34,6 @@ public class UpgradeCliApplicationTestConfiguration { // to instantiate // see: https://github.com/spring-projects/spring-framework/issues/33934 @MockBean public UpgradeCli upgradeCli; - @MockBean public Database ebeanServer; - @MockBean public SearchService searchService; @MockBean public GraphService graphService; @@ -41,6 +44,14 @@ public class UpgradeCliApplicationTestConfiguration { @MockBean public SearchClientShim searchClientShim; + @PostConstruct + public void configureMocks() { + // Configure SearchClientShim mock to return a valid engine type + Mockito.when(searchClientShim.getEngineType()) + .thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + } + + @Primary @Bean public MeterRegistry meterRegistry() { return new SimpleMeterRegistry(); @@ -58,4 +69,13 @@ public SchemaRegistryService schemaRegistryService( @Qualifier("eventSchemaData") final EventSchemaData eventSchemaData) { return new SchemaRegistryServiceImpl(new TopicConventionImpl(), eventSchemaData); } + + @Bean + @Primary + public Database ebeanServer() { + // Create a real H2 in-memory database for testing with a unique name to avoid conflicts + String instanceId = "upgradecli_" + UUID.randomUUID().toString().replace("-", ""); + String serverName = "upgradecli_test_" + UUID.randomUUID().toString().replace("-", ""); + return EbeanTestUtils.createNamedTestServer(instanceId, serverName); + } } diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/LoadIndicesConditionTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/LoadIndicesConditionTest.java new file mode 100644 index 000000000000..e3b76ab98ba7 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/LoadIndicesConditionTest.java @@ -0,0 +1,265 @@ +package com.linkedin.datahub.upgrade.config; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.datahub.upgrade.conditions.LoadIndicesCondition; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.springframework.beans.factory.config.ConfigurableListableBeanFactory; +import org.springframework.boot.ApplicationArguments; +import org.springframework.context.annotation.ConditionContext; +import org.springframework.core.type.AnnotatedTypeMetadata; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class LoadIndicesConditionTest { + + @Mock private ConditionContext conditionContext; + + @Mock private AnnotatedTypeMetadata annotatedTypeMetadata; + + @Mock private ConfigurableListableBeanFactory beanFactory; + + @Mock private ApplicationArguments applicationArguments; + + private LoadIndicesCondition loadIndicesCondition; + + @BeforeMethod + public void setUp() { + MockitoAnnotations.openMocks(this); + loadIndicesCondition = new LoadIndicesCondition(); + + // Setup the basic mock chain + when(conditionContext.getBeanFactory()).thenReturn(beanFactory); + when(beanFactory.getBean(ApplicationArguments.class)).thenReturn(applicationArguments); + } + + @Test + public void testConstant() { + // Verify the constant is correctly defined + assertEquals(LoadIndicesCondition.LOAD_INDICES_ARG, "LoadIndices"); + } + + @Test + public void testMatches_WithExactMatch() { + // Arrange + List nonOptionArgs = Arrays.asList("LoadIndices", "otherArg"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + verify(conditionContext).getBeanFactory(); + verify(beanFactory).getBean(ApplicationArguments.class); + verify(applicationArguments).getNonOptionArgs(); + } + + @Test + public void testMatches_WithNoMatch() { + // Arrange + List nonOptionArgs = Arrays.asList("someOtherArg", "anotherArg"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertFalse(result); + } + + @Test + public void testMatches_WithEmptyArguments() { + // Arrange + List nonOptionArgs = Collections.emptyList(); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertFalse(result); + } + + @Test + public void testMatches_WithNullArguments() { + // Arrange + when(applicationArguments.getNonOptionArgs()).thenReturn(null); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertFalse(result); + } + + @Test + public void testMatches_WithNullElementsInArguments() { + // Arrange + List nonOptionArgs = Arrays.asList("LoadIndices", null, "otherArg"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + } + + @Test + public void testMatches_WithOnlyNullElements() { + // Arrange + List nonOptionArgs = Arrays.asList(null, null); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertFalse(result); + } + + @Test + public void testMatches_WithLoadIndicesOnly() { + // Arrange + List nonOptionArgs = Arrays.asList("LoadIndices"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + } + + @Test + public void testMatches_WithLoadIndicesAtEnd() { + // Arrange + List nonOptionArgs = Arrays.asList("arg1", "arg2", "LoadIndices"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + } + + @Test + public void testMatches_WithLoadIndicesAtBeginning() { + // Arrange + List nonOptionArgs = Arrays.asList("LoadIndices", "arg1", "arg2"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + } + + @Test + public void testMatches_WithLoadIndicesInMiddle() { + // Arrange + List nonOptionArgs = Arrays.asList("arg1", "LoadIndices", "arg2"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + } + + @Test + public void testMatches_WithLargeArgumentList() { + // Arrange + List nonOptionArgs = + Arrays.asList( + "arg1", + "arg2", + "arg3", + "arg4", + "arg5", + "arg6", + "arg7", + "arg8", + "arg9", + "arg10", + "LoadIndices", + "arg11", + "arg12", + "arg13", + "arg14", + "arg15"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result, "Should find match even in large argument list"); + } + + @Test + public void testMatches_WithSpecialCharactersInArguments() { + // Arrange + List nonOptionArgs = + Arrays.asList( + "arg-with-dashes", + "arg_with_underscores", + "arg.with.dots", + "arg@with@symbols", + "LoadIndices", + "arg with spaces"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act + boolean result = loadIndicesCondition.matches(conditionContext, annotatedTypeMetadata); + + // Assert + assertTrue(result); + } + + @Test + public void testImplementsConditionInterface() { + // Verify that the class properly implements the Condition interface + assertTrue(loadIndicesCondition instanceof org.springframework.context.annotation.Condition); + } + + @Test + public void testConstantAccessibility() { + // Verify the constant can be accessed statically + String loadIndicesConstant = LoadIndicesCondition.LOAD_INDICES_ARG; + assertNotNull(loadIndicesConstant); + assertFalse(loadIndicesConstant.isEmpty()); + assertEquals(loadIndicesConstant, "LoadIndices"); + } + + @Test + public void testMatches_MetadataParameterNotUsed() { + // Arrange + List nonOptionArgs = Arrays.asList("LoadIndices"); + when(applicationArguments.getNonOptionArgs()).thenReturn(nonOptionArgs); + + // Act - Call with null metadata to ensure it's not used + boolean result = loadIndicesCondition.matches(conditionContext, null); + + // Assert + assertTrue(result); + // Verify metadata was never accessed (no interactions) + verifyNoInteractions(annotatedTypeMetadata); + } + + @Test + public void testLoadIndicesArgsSet() { + // Verify the LOAD_INDICES_ARGS set contains the expected value + assertTrue(LoadIndicesCondition.LOAD_INDICES_ARGS.contains("LoadIndices")); + assertEquals(LoadIndicesCondition.LOAD_INDICES_ARGS.size(), 1); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/ReindexDebugConditionTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/ReindexDebugConditionTest.java index 27625a4ae05e..5de9bd6753f2 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/ReindexDebugConditionTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/config/ReindexDebugConditionTest.java @@ -3,6 +3,7 @@ import static org.mockito.Mockito.*; import static org.testng.Assert.*; +import com.linkedin.datahub.upgrade.conditions.ReindexDebugCondition; import java.util.Arrays; import java.util.Collections; import java.util.List; diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesApplicationTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesApplicationTest.java new file mode 100644 index 000000000000..888437dcb87f --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesApplicationTest.java @@ -0,0 +1,59 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.AssertJUnit.assertNotNull; + +import com.linkedin.datahub.upgrade.UpgradeCliApplication; +import com.linkedin.datahub.upgrade.UpgradeCliApplicationTestConfiguration; +import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices; +import com.linkedin.datahub.upgrade.system.SystemUpdate; +import com.linkedin.datahub.upgrade.system.SystemUpdateBlocking; +import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; +import javax.inject.Named; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@ActiveProfiles("test") +@SpringBootTest( + classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class}, + args = {"-u", "LoadIndices"}) +public class LoadIndicesApplicationTest extends AbstractTestNGSpringContextTests { + + @Autowired + @Named("loadIndices") + private LoadIndices loadIndices; + + @Autowired(required = false) + @Named("restoreIndices") + private RestoreIndices restoreIndices; + + @Autowired(required = false) + @Named("systemUpdate") + private SystemUpdate systemUpdate; + + @Autowired(required = false) + @Named("systemUpdateBlocking") + private SystemUpdateBlocking systemUpdateBlocking; + + @Autowired(required = false) + @Named("systemUpdateNonBlocking") + private SystemUpdateNonBlocking systemUpdateNonBlocking; + + @Test + public void testInit() { + assertNotNull(loadIndices); + assertNull(restoreIndices, "Expected no additional execution components."); + assertNull(systemUpdate, "Expected no additional execution components."); + assertNull(systemUpdateBlocking, "Expected no additional execution components."); + assertNull(systemUpdateNonBlocking, "Expected no additional execution components."); + } + + @Test + public void testSteps() { + assertTrue(loadIndices.steps().size() >= 2); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesArgsTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesArgsTest.java new file mode 100644 index 000000000000..acda024fb0e6 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesArgsTest.java @@ -0,0 +1,92 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotSame; + +import java.util.List; +import org.testng.annotations.Test; + +public class LoadIndicesArgsTest { + + @Test + public void testDefaultValues() { + LoadIndicesArgs args = new LoadIndicesArgs(); + + assertEquals(0, args.batchSize); + assertEquals(0, args.limit); + assertEquals(null, args.urnLike); + assertEquals(null, args.lePitEpochMs); + assertEquals(null, args.gePitEpochMs); + assertEquals(null, args.aspectNames); + } + + @Test + public void testSettersAndGetters() { + LoadIndicesArgs args = new LoadIndicesArgs(); + + args.batchSize = 1000; + args.limit = 5000; + args.urnLike = "urn:li:dataset:*"; + args.lePitEpochMs = 1640995200000L; + args.gePitEpochMs = 1672531200000L; + args.aspectNames = List.of("datasetProperties", "ownership"); + + assertEquals(1000, args.batchSize); + assertEquals(5000, args.limit); + assertEquals("urn:li:dataset:*", args.urnLike); + assertEquals(Long.valueOf(1640995200000L), args.lePitEpochMs); + assertEquals(Long.valueOf(1672531200000L), args.gePitEpochMs); + assertEquals(List.of("datasetProperties", "ownership"), args.aspectNames); + } + + @Test + public void testClone() { + LoadIndicesArgs original = new LoadIndicesArgs(); + original.batchSize = 1000; + original.limit = 5000; + original.urnLike = "urn:li:dataset:*"; + original.lePitEpochMs = 1640995200000L; + original.gePitEpochMs = 1672531200000L; + original.aspectNames = List.of("datasetProperties", "ownership"); + + LoadIndicesArgs cloned = original.clone(); + + // Verify it's a different object + assertNotSame(original, cloned); + + // Verify all values are copied + assertEquals(original.batchSize, cloned.batchSize); + assertEquals(original.limit, cloned.limit); + assertEquals(original.urnLike, cloned.urnLike); + assertEquals(original.lePitEpochMs, cloned.lePitEpochMs); + assertEquals(original.gePitEpochMs, cloned.gePitEpochMs); + assertEquals(original.aspectNames, cloned.aspectNames); + + // Verify modifying clone doesn't affect original + cloned.batchSize = 2000; + cloned.urnLike = "urn:li:table:*"; + + assertEquals(1000, original.batchSize); + assertEquals("urn:li:dataset:*", original.urnLike); + assertEquals(2000, cloned.batchSize); + assertEquals("urn:li:table:*", cloned.urnLike); + } + + @Test + public void testCloneWithNullValues() { + LoadIndicesArgs original = new LoadIndicesArgs(); + original.batchSize = 1000; + original.limit = 5000; + // Leave other fields as null + + LoadIndicesArgs cloned = original.clone(); + + assertNotSame(original, cloned); + assertEquals(1000, cloned.batchSize); + assertEquals(5000, cloned.limit); + assertEquals(null, cloned.urnLike); + assertEquals(null, cloned.lePitEpochMs); + assertEquals(null, cloned.gePitEpochMs); + assertEquals(null, cloned.aspectNames); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManagerTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManagerTest.java new file mode 100644 index 000000000000..379c408f9286 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManagerTest.java @@ -0,0 +1,382 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; +import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.metadata.utils.elasticsearch.SearchClientShim; +import com.linkedin.metadata.utils.elasticsearch.responses.GetIndexResponse; +import java.io.IOException; +import java.util.Map; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.indices.GetIndexRequest; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class LoadIndicesIndexManagerTest { + + private LoadIndicesIndexManager indexManager; + private SearchClientShim mockSearchClient; + private IndexConvention mockIndexConvention; + private ESIndexBuilder mockIndexBuilder; + + @BeforeMethod + public void setUp() { + mockSearchClient = mock(SearchClientShim.class); + mockIndexConvention = mock(IndexConvention.class); + mockIndexBuilder = mock(ESIndexBuilder.class); + + // Create a fresh instance for each test to avoid state accumulation + indexManager = + new LoadIndicesIndexManager(mockSearchClient, mockIndexConvention, mockIndexBuilder); + } + + @Test + public void testConstructor() { + assertNotNull(indexManager); + assertFalse(indexManager.isSettingsOptimized()); + } + + @Test + public void testDiscoverDataHubIndexConfigs() throws IOException { + // Mock entity indices response + GetIndexResponse mockEntityResponse = mock(GetIndexResponse.class); + String[] entityIndices = { + "datahub_datasetindex_v2", "datahub_dashboardindex_v2", "datahub_chartindex_v2" + }; + when(mockEntityResponse.getIndices()).thenReturn(entityIndices); + + // Mock graph service index response + GetIndexResponse mockGraphResponse = mock(GetIndexResponse.class); + String[] graphIndices = {"datahub_graph_service_v1"}; + when(mockGraphResponse.getIndices()).thenReturn(graphIndices); + + // Mock system metadata index response + GetIndexResponse mockSystemMetadataResponse = mock(GetIndexResponse.class); + String[] systemMetadataIndices = {"datahub_system_metadata_service_v1"}; + when(mockSystemMetadataResponse.getIndices()).thenReturn(systemMetadataIndices); + + // Mock the search client to return different responses for different indices + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenReturn(mockEntityResponse) + .thenReturn(mockGraphResponse) + .thenReturn(mockSystemMetadataResponse); + + // Mock index convention patterns + when(mockIndexConvention.getAllEntityIndicesPattern()).thenReturn("datahub_*index_v2"); + when(mockIndexConvention.getIndexName(ElasticSearchGraphService.INDEX_NAME)) + .thenReturn("datahub_graph_service_v1"); + when(mockIndexConvention.getIndexName(ElasticSearchSystemMetadataService.INDEX_NAME)) + .thenReturn("datahub_system_metadata_service_v1"); + + // Mock ESIndexBuilder to return ReindexConfig objects + ReindexConfig mockConfig1 = mock(ReindexConfig.class); + ReindexConfig mockConfig2 = mock(ReindexConfig.class); + ReindexConfig mockConfig3 = mock(ReindexConfig.class); + ReindexConfig mockConfig4 = mock(ReindexConfig.class); + ReindexConfig mockConfig5 = mock(ReindexConfig.class); + + when(mockConfig1.name()).thenReturn("datahub_datasetindex_v2"); + when(mockConfig2.name()).thenReturn("datahub_dashboardindex_v2"); + when(mockConfig3.name()).thenReturn("datahub_chartindex_v2"); + when(mockConfig4.name()).thenReturn("datahub_graph_service_v1"); + when(mockConfig5.name()).thenReturn("datahub_system_metadata_service_v1"); + + // Mock target settings for each config + Map targetSettings1 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + Map targetSettings2 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + Map targetSettings3 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + Map targetSettings4 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + Map targetSettings5 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + + when(mockConfig1.targetSettings()).thenReturn(targetSettings1); + when(mockConfig2.targetSettings()).thenReturn(targetSettings2); + when(mockConfig3.targetSettings()).thenReturn(targetSettings3); + when(mockConfig4.targetSettings()).thenReturn(targetSettings4); + when(mockConfig5.targetSettings()).thenReturn(targetSettings5); + + when(mockIndexBuilder.buildReindexState(any(String.class), any(Map.class), any(Map.class))) + .thenReturn(mockConfig1) + .thenReturn(mockConfig2) + .thenReturn(mockConfig3) + .thenReturn(mockConfig4) + .thenReturn(mockConfig5); + + var result = indexManager.discoverDataHubIndexConfigs(); + + assertNotNull(result); + assertEquals(result.size(), 5); + assertTrue(result.stream().anyMatch(c -> c.name().equals("datahub_datasetindex_v2"))); + assertTrue(result.stream().anyMatch(c -> c.name().equals("datahub_dashboardindex_v2"))); + assertTrue(result.stream().anyMatch(c -> c.name().equals("datahub_chartindex_v2"))); + assertTrue(result.stream().anyMatch(c -> c.name().equals("datahub_graph_service_v1"))); + assertTrue( + result.stream().anyMatch(c -> c.name().equals("datahub_system_metadata_service_v1"))); + } + + @Test + public void testDiscoverDataHubIndexConfigsWithIOException() throws IOException { + // Mock IOException + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenThrow(new IOException("Connection failed")); + + assertThrows(IOException.class, () -> indexManager.discoverDataHubIndexConfigs()); + } + + @Test + public void testOptimizeForBulkOperations() throws IOException { + // Mock discoverDataHubIndexConfigs behavior + GetIndexResponse mockResponse = mock(GetIndexResponse.class); + String[] allIndices = {"datahub_dataset_v2"}; + when(mockResponse.getIndices()).thenReturn(allIndices); + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenReturn(mockResponse); + + // Mock ESIndexBuilder to return ReindexConfig objects + ReindexConfig mockConfig = mock(ReindexConfig.class); + when(mockConfig.name()).thenReturn("datahub_dataset_v2"); + Map targetSettings = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + when(mockConfig.targetSettings()).thenReturn(targetSettings); + when(mockIndexBuilder.buildReindexState(any(String.class), any(Map.class), any(Map.class))) + .thenReturn(mockConfig); + + // Mock update settings + when(mockSearchClient.updateIndexSettings( + any(UpdateSettingsRequest.class), any(RequestOptions.class))) + .thenReturn(null); + + indexManager.optimizeForBulkOperations(); + + assertTrue(indexManager.isSettingsOptimized()); + } + + @Test + public void testOptimizeForBulkOperationsWithIOException() throws IOException { + // Mock IOException during discovery + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenThrow(new IOException("Discovery failed")); + + assertThrows(IOException.class, () -> indexManager.optimizeForBulkOperations()); + } + + @Test + public void testRestoreFromConfiguration() throws IOException { + // First optimize settings to set up state + GetIndexResponse mockResponse = mock(GetIndexResponse.class); + String[] allIndices = {"datahub_dataset_v2"}; + when(mockResponse.getIndices()).thenReturn(allIndices); + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenReturn(mockResponse); + + // Mock ESIndexBuilder to return ReindexConfig objects + ReindexConfig mockConfig = mock(ReindexConfig.class); + when(mockConfig.name()).thenReturn("datahub_dataset_v2"); + Map targetSettings = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + when(mockConfig.targetSettings()).thenReturn(targetSettings); + when(mockIndexBuilder.buildReindexState(any(String.class), any(Map.class), any(Map.class))) + .thenReturn(mockConfig); + + // Mock update settings + when(mockSearchClient.updateIndexSettings( + any(UpdateSettingsRequest.class), any(RequestOptions.class))) + .thenReturn(null); + + // Optimize settings first + indexManager.optimizeForBulkOperations(); + assertTrue(indexManager.isSettingsOptimized()); + + // Now test restore + indexManager.restoreFromConfiguration(); + + assertFalse(indexManager.isSettingsOptimized()); + } + + @Test + public void testRestoreFromConfigurationWithIOException() throws IOException { + // Set up state first + GetIndexResponse mockResponse = mock(GetIndexResponse.class); + String[] allIndices = {"datahub_dataset_v2"}; + when(mockResponse.getIndices()).thenReturn(allIndices); + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenReturn(mockResponse); + + // Mock ESIndexBuilder to return ReindexConfig objects + ReindexConfig mockConfig = mock(ReindexConfig.class); + when(mockConfig.name()).thenReturn("datahub_dataset_v2"); + Map targetSettings = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + when(mockConfig.targetSettings()).thenReturn(targetSettings); + when(mockIndexBuilder.buildReindexState(any(String.class), any(Map.class), any(Map.class))) + .thenReturn(mockConfig); + + // Mock ESIndexBuilder methods to throw IOException during restore + doNothing() + .when(mockIndexBuilder) + .setIndexRefreshInterval(any(String.class), any(String.class)); + doNothing().when(mockIndexBuilder).tweakReplicas(any(ReindexConfig.class), any(Boolean.class)); + doThrow(new IOException("Update failed")) + .when(mockIndexBuilder) + .setIndexReplicaCount(any(String.class), any(Integer.class)); + + // Optimize settings first + indexManager.optimizeForBulkOperations(); + assertTrue(indexManager.isSettingsOptimized()); + + // Now test restore + assertThrows(IOException.class, () -> indexManager.restoreFromConfiguration()); + } + + @Test + public void testOptimizeForBulkOperationsManagesBothRefreshAndReplicas() throws IOException { + // Mock responses for index discovery + GetIndexResponse mockEntityResponse = mock(GetIndexResponse.class); + String[] entityIndices = {"datahub_datasetindex_v2"}; + when(mockEntityResponse.getIndices()).thenReturn(entityIndices); + + GetIndexResponse mockGraphResponse = mock(GetIndexResponse.class); + GetIndexResponse mockSystemMetadataResponse = mock(GetIndexResponse.class); + when(mockGraphResponse.getIndices()).thenReturn(new String[0]); + when(mockSystemMetadataResponse.getIndices()).thenReturn(new String[0]); + + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenReturn(mockEntityResponse) + .thenReturn(mockGraphResponse) + .thenReturn(mockSystemMetadataResponse); + + // Mock index convention patterns + when(mockIndexConvention.getAllEntityIndicesPattern()).thenReturn("datahub_*index_v2"); + when(mockIndexConvention.getIndexName(ElasticSearchGraphService.INDEX_NAME)) + .thenReturn("datahub_graph_service_v1"); + when(mockIndexConvention.getIndexName(ElasticSearchSystemMetadataService.INDEX_NAME)) + .thenReturn("datahub_system_metadata_service_v1"); + + // Mock ESIndexBuilder to return ReindexConfig objects + ReindexConfig mockConfig = mock(ReindexConfig.class); + when(mockConfig.name()).thenReturn("datahub_datasetindex_v2"); + Map targetSettings = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "3s", ESIndexBuilder.NUMBER_OF_REPLICAS, 1)); + when(mockConfig.targetSettings()).thenReturn(targetSettings); + when(mockIndexBuilder.buildReindexState(any(String.class), any(Map.class), any(Map.class))) + .thenReturn(mockConfig); + + // Mock ESIndexBuilder methods instead of SearchClientShim + doNothing() + .when(mockIndexBuilder) + .setIndexRefreshInterval(any(String.class), any(String.class)); + doNothing().when(mockIndexBuilder).tweakReplicas(any(ReindexConfig.class), any(Boolean.class)); + + indexManager.optimizeForBulkOperations(); + + assertTrue(indexManager.isSettingsOptimized()); + + // Verify that ESIndexBuilder methods were called + verify(mockIndexBuilder, times(1)).setIndexRefreshInterval("datahub_datasetindex_v2", "-1"); + verify(mockIndexBuilder, times(1)).tweakReplicas(mockConfig, false); + } + + @Test + public void testRestoreFromConfigurationWithPerIndexOverrides() throws IOException { + // Mock entity indices response + GetIndexResponse mockEntityResponse = mock(GetIndexResponse.class); + String[] entityIndices = {"datahub_dataset_v2", "datahub_dashboard_v2"}; + when(mockEntityResponse.getIndices()).thenReturn(entityIndices); + + // Mock graph service index response (empty) + GetIndexResponse mockGraphResponse = mock(GetIndexResponse.class); + when(mockGraphResponse.getIndices()).thenReturn(new String[0]); + + // Mock system metadata index response (empty) + GetIndexResponse mockSystemMetadataResponse = mock(GetIndexResponse.class); + when(mockSystemMetadataResponse.getIndices()).thenReturn(new String[0]); + + // Mock the search client to return different responses for different patterns + when(mockSearchClient.getIndex(any(GetIndexRequest.class), any(RequestOptions.class))) + .thenReturn(mockEntityResponse) + .thenReturn(mockGraphResponse) + .thenReturn(mockSystemMetadataResponse); + + // Mock index convention patterns + when(mockIndexConvention.getAllEntityIndicesPattern()).thenReturn("datahub_*index_v2"); + when(mockIndexConvention.getIndexName(ElasticSearchGraphService.INDEX_NAME)) + .thenReturn("datahub_graph_service_v1"); + when(mockIndexConvention.getIndexName(ElasticSearchSystemMetadataService.INDEX_NAME)) + .thenReturn("datahub_system_metadata_service_v1"); + + // Mock ESIndexBuilder to return ReindexConfig with different per-index settings + ReindexConfig mockConfig1 = mock(ReindexConfig.class); + ReindexConfig mockConfig2 = mock(ReindexConfig.class); + + when(mockConfig1.name()).thenReturn("datahub_dataset_v2"); + when(mockConfig2.name()).thenReturn("datahub_dashboard_v2"); + + // Mock different target settings for each index (simulating per-index overrides) + Map targetSettings1 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "5s", ESIndexBuilder.NUMBER_OF_REPLICAS, 2)); + Map targetSettings2 = + Map.of( + "index", + Map.of(ESIndexBuilder.REFRESH_INTERVAL, "10s", ESIndexBuilder.NUMBER_OF_REPLICAS, 0)); + + when(mockConfig1.targetSettings()).thenReturn(targetSettings1); + when(mockConfig2.targetSettings()).thenReturn(targetSettings2); + + when(mockIndexBuilder.buildReindexState(any(String.class), any(Map.class), any(Map.class))) + .thenReturn(mockConfig1) + .thenReturn(mockConfig2); + + // Mock ESIndexBuilder methods + doNothing() + .when(mockIndexBuilder) + .setIndexRefreshInterval(any(String.class), any(String.class)); + doNothing().when(mockIndexBuilder).setIndexReplicaCount(any(String.class), any(Integer.class)); + doNothing().when(mockIndexBuilder).tweakReplicas(any(ReindexConfig.class), any(Boolean.class)); + + // First optimize settings + indexManager.optimizeForBulkOperations(); + assertTrue(indexManager.isSettingsOptimized()); + + // Now test restore with per-index overrides + indexManager.restoreFromConfiguration(); + + assertFalse(indexManager.isSettingsOptimized()); + + // Verify that ESIndexBuilder methods were called for both indices with their specific settings + verify(mockIndexBuilder, times(1)).setIndexRefreshInterval("datahub_dataset_v2", "5s"); + verify(mockIndexBuilder, times(1)).setIndexReplicaCount("datahub_dataset_v2", 2); + verify(mockIndexBuilder, times(1)).setIndexRefreshInterval("datahub_dashboard_v2", "10s"); + verify(mockIndexBuilder, times(1)).setIndexReplicaCount("datahub_dashboard_v2", 0); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesResultTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesResultTest.java new file mode 100644 index 000000000000..097ba50e7fe7 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesResultTest.java @@ -0,0 +1,64 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; + +public class LoadIndicesResultTest { + + @Test + public void testDefaultValues() { + LoadIndicesResult result = new LoadIndicesResult(); + + assertEquals(0, result.rowsProcessed); + assertEquals(0, result.ignored); + assertEquals(0, result.timeSqlQueryMs); + assertEquals(0, result.timeElasticsearchWriteMs); + } + + @Test + public void testSettersAndGetters() { + LoadIndicesResult result = new LoadIndicesResult(); + + result.rowsProcessed = 1000; + result.ignored = 5; + result.timeSqlQueryMs = 1500L; + result.timeElasticsearchWriteMs = 8000L; + + assertEquals(1000, result.rowsProcessed); + assertEquals(5, result.ignored); + assertEquals(1500L, result.timeSqlQueryMs); + assertEquals(8000L, result.timeElasticsearchWriteMs); + } + + @Test + public void testToString() { + LoadIndicesResult result = new LoadIndicesResult(); + result.rowsProcessed = 1000; + result.ignored = 5; + result.timeSqlQueryMs = 1500L; + result.timeElasticsearchWriteMs = 8000L; + + String resultString = result.toString(); + + assertTrue(resultString.contains("LoadIndicesResult")); + assertTrue(resultString.contains("rowsProcessed=1000")); + assertTrue(resultString.contains("ignored=5")); + assertTrue(resultString.contains("timeSqlQueryMs=1500")); + assertTrue(resultString.contains("timeElasticsearchWriteMs=8000")); + } + + @Test + public void testToStringWithZeroValues() { + LoadIndicesResult result = new LoadIndicesResult(); + + String resultString = result.toString(); + + assertTrue(resultString.contains("LoadIndicesResult")); + assertTrue(resultString.contains("rowsProcessed=0")); + assertTrue(resultString.contains("ignored=0")); + assertTrue(resultString.contains("timeSqlQueryMs=0")); + assertTrue(resultString.contains("timeElasticsearchWriteMs=0")); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesStepTest.java new file mode 100644 index 000000000000..d000486da579 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesStepTest.java @@ -0,0 +1,1579 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.UpgradeReport; +import com.linkedin.datahub.upgrade.UpgradeStepResult; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.EbeanAspectV2; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.service.UpdateIndicesService; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.upgrade.DataHubUpgradeState; +import io.datahubproject.metadata.context.ActorContext; +import io.datahubproject.metadata.context.OperationContext; +import io.ebean.Database; +import java.io.IOException; +import java.sql.Timestamp; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class LoadIndicesStepTest { + + private LoadIndicesStep loadIndicesStep; + private Database database; + @Mock private EntityService mockEntityService; + @Mock private UpdateIndicesService mockUpdateIndicesService; + @Mock private LoadIndicesIndexManager mockIndexManager; + @Mock private UpgradeContext mockUpgradeContext; + @Mock private UpgradeReport mockUpgradeReport; + @Mock private OperationContext mockOperationContext; + @Mock private EntityRegistry mockEntityRegistry; + @Mock private ActorContext mockActorContext; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + + // Create a real H2 in-memory database for testing with a unique name to avoid conflicts + String instanceId = "loadindices_" + UUID.randomUUID().toString().replace("-", ""); + String serverName = "loadindices_test_" + UUID.randomUUID().toString().replace("-", ""); + database = EbeanTestUtils.createNamedTestServer(instanceId, serverName); + + // Setup test database with some sample data + setupTestDatabase(); + + loadIndicesStep = + new LoadIndicesStep( + database, mockEntityService, mockUpdateIndicesService, mockIndexManager); + + when(mockUpgradeContext.report()).thenReturn(mockUpgradeReport); + when(mockUpgradeContext.opContext()).thenReturn(mockOperationContext); + when(mockOperationContext.getEntityRegistry()).thenReturn(mockEntityRegistry); + when(mockOperationContext.getActorContext()).thenReturn(mockActorContext); + + // Mock authentication context + com.datahub.authentication.Authentication mockAuth = + mock(com.datahub.authentication.Authentication.class); + com.datahub.authentication.Actor mockActor = mock(com.datahub.authentication.Actor.class); + when(mockActorContext.getAuthentication()).thenReturn(mockAuth); + when(mockAuth.getActor()).thenReturn(mockActor); + when(mockActor.toUrnStr()).thenReturn("urn:li:corpuser:testUser"); + } + + @AfterMethod + public void cleanup() { + if (database != null) { + database.shutdown(); + } + } + + private void setupTestDatabase() { + // Insert a few test rows to simulate real data + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset1,PROD)", + "container", + 0, + Instant.now(), + "testUser"); + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset2,PROD)", + "container", + 0, + Instant.now(), + "testUser"); + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset3,PROD)", + "ownership", + 0, + Instant.now(), + "testUser"); + } + + private void insertTestRow( + String urn, String aspect, int version, Instant createdTime, String createdBy) { + EbeanAspectV2 aspectV2 = new EbeanAspectV2(); + aspectV2.setUrn(urn); + aspectV2.setAspect(aspect); + aspectV2.setVersion(version); + aspectV2.setMetadata("{}"); // Required field + aspectV2.setCreatedOn(Timestamp.from(createdTime)); + aspectV2.setCreatedBy(createdBy); + database.save(aspectV2); + } + + private void insertTestRowWithValidData( + String urn, String aspect, int version, Instant createdTime, String createdBy) { + EbeanAspectV2 aspectV2 = new EbeanAspectV2(); + aspectV2.setUrn(urn); + aspectV2.setAspect(aspect); + aspectV2.setVersion(version); + // Provide valid container aspect JSON data + aspectV2.setMetadata("{\"container\":{\"urn\":\"" + urn + "\"}}"); + aspectV2.setCreatedOn(Timestamp.from(createdTime)); + aspectV2.setCreatedBy(createdBy); + database.save(aspectV2); + } + + @Test + public void testId() { + assertEquals(loadIndicesStep.id(), "LoadIndicesStep"); + } + + @Test + public void testRetryCount() { + assertEquals(loadIndicesStep.retryCount(), 0); + } + + @Test + public void testExecutableSuccess() throws IOException { + var executable = loadIndicesStep.executable(); + assertNotNull(executable); + + // Mock successful index manager operations + doNothing().when(mockIndexManager).optimizeForBulkOperations(); + when(mockIndexManager.isSettingsOptimized()).thenReturn(true); + doNothing().when(mockIndexManager).restoreFromConfiguration(); + + // Execute the step + UpgradeStepResult result = executable.apply(mockUpgradeContext); + + assertNotNull(result); + assertEquals(result.stepId(), "LoadIndicesStep"); + assertTrue(result.result() == DataHubUpgradeState.SUCCEEDED); + + // Verify index manager was called + verify(mockIndexManager, times(1)).optimizeForBulkOperations(); + verify(mockIndexManager, times(1)).restoreFromConfiguration(); + } + + @Test + public void testExecutableWithIndexManagerDisableFailure() throws IOException { + var executable = loadIndicesStep.executable(); + assertNotNull(executable); + + // Mock index manager disable failure + doThrow(new IOException("Failed to optimize settings")) + .when(mockIndexManager) + .optimizeForBulkOperations(); + + // Execute the step + UpgradeStepResult result = executable.apply(mockUpgradeContext); + + assertNotNull(result); + assertEquals(result.stepId(), "LoadIndicesStep"); + assertTrue(result.result() == DataHubUpgradeState.FAILED); + + // Verify index manager was called + verify(mockIndexManager, times(1)).optimizeForBulkOperations(); + verify(mockIndexManager, never()).restoreFromConfiguration(); + } + + @Test + public void testExecutableWithIndexManagerRestoreFailure() throws IOException { + var executable = loadIndicesStep.executable(); + assertNotNull(executable); + + // Mock successful optimization but failed restore + doNothing().when(mockIndexManager).optimizeForBulkOperations(); + when(mockIndexManager.isSettingsOptimized()).thenReturn(true); + doThrow(new IOException("Failed to restore settings")) + .when(mockIndexManager) + .restoreFromConfiguration(); + + // Execute the step + UpgradeStepResult result = executable.apply(mockUpgradeContext); + + assertNotNull(result); + assertEquals(result.stepId(), "LoadIndicesStep"); + // The step should still succeed even if restore fails, as it logs the error but doesn't fail + // the step + assertTrue(result.result() == DataHubUpgradeState.SUCCEEDED); + + // Verify index manager was called + verify(mockIndexManager, times(1)).optimizeForBulkOperations(); + verify(mockIndexManager, times(1)).restoreFromConfiguration(); + } + + @Test + public void testProcessAllDataDirectly() throws Exception { + // Test the processAllDataDirectly method directly + // This method processes data from the database and calls updateIndicesService + + // Mock the updateIndicesService to not throw exceptions + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + // Create test args + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.aspectNames = java.util.List.of("container", "ownership"); + + // Call the method via reflection since it's private - it takes 3 parameters + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // This should complete successfully with our test data + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + // Verify that updateIndicesService was called - it calls flush() instead of + // handleChangeEvents() + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testGetDefaultAspectNames() throws Exception { + // Test the getDefaultAspectNames method + var method = + LoadIndicesStep.class.getDeclaredMethod("getDefaultAspectNames", OperationContext.class); + method.setAccessible(true); + + // Mock entity registry to return some entity specs + Map entitySpecs = new HashMap<>(); + when(mockEntityRegistry.getEntitySpecs()).thenReturn(entitySpecs); + + @SuppressWarnings("unchecked") + Set result = (Set) method.invoke(loadIndicesStep, mockOperationContext); + + assertNotNull(result); + // Result should be empty since we have no entity specs + assertTrue(result.isEmpty()); + } + + @Test(expectedExceptions = RuntimeException.class) + public void testGetDefaultAspectNamesWithNullOperationContext() { + // Test with null operation context - should throw NullPointerException wrapped in + // RuntimeException + try { + var method = + LoadIndicesStep.class.getDeclaredMethod("getDefaultAspectNames", OperationContext.class); + method.setAccessible(true); + method.invoke(loadIndicesStep, (OperationContext) null); + fail("Expected RuntimeException to be thrown"); + } catch (Exception e) { + // The actual exception will be wrapped in InvocationTargetException + if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException) e.getCause(); + } + throw new RuntimeException(e); + } + } + + @Test(expectedExceptions = RuntimeException.class) + public void testGetDefaultAspectNamesWithNullEntityRegistry() { + // Test with null entity registry - should throw NullPointerException wrapped in + // RuntimeException + when(mockOperationContext.getEntityRegistry()).thenReturn(null); + + try { + var method = + LoadIndicesStep.class.getDeclaredMethod("getDefaultAspectNames", OperationContext.class); + method.setAccessible(true); + method.invoke(loadIndicesStep, mockOperationContext); + fail("Expected RuntimeException to be thrown"); + } catch (Exception e) { + // The actual exception will be wrapped in InvocationTargetException + if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException) e.getCause(); + } + throw new RuntimeException(e); + } + } + + @Test + public void testContainsKey() throws Exception { + // Test the containsKey method + var method = LoadIndicesStep.class.getDeclaredMethod("containsKey", Map.class, String.class); + method.setAccessible(true); + + Map> testMap = new HashMap<>(); + testMap.put("key1", Optional.of("value1")); + testMap.put("key2", Optional.of("value2")); + testMap.put("key3", Optional.empty()); + + // Test with existing key that has a value + boolean result1 = (Boolean) method.invoke(loadIndicesStep, testMap, "key1"); + assertTrue(result1); + + // Test with existing key that has empty optional + boolean result2 = (Boolean) method.invoke(loadIndicesStep, testMap, "key3"); + assertTrue(!result2); + + // Test with non-existing key + boolean result3 = (Boolean) method.invoke(loadIndicesStep, testMap, "key4"); + assertTrue(!result3); + + // Test with null map - this should throw NullPointerException + try { + method.invoke(loadIndicesStep, null, "key1"); + fail("Expected NullPointerException for null map"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NullPointerException); + } + } + + @Test + public void testGetArgs() throws Exception { + // Test the getArgs method + var method = LoadIndicesStep.class.getDeclaredMethod("getArgs", UpgradeContext.class); + method.setAccessible(true); + + // Test with empty parsed args + Map> parsedArgs = new HashMap<>(); + when(mockUpgradeContext.parsedArgs()).thenReturn(parsedArgs); + + @SuppressWarnings("unchecked") + LoadIndicesArgs result = (LoadIndicesArgs) method.invoke(loadIndicesStep, mockUpgradeContext); + + assertNotNull(result); + assertEquals(result.batchSize, 10000); // Default batch size + assertEquals(result.limit, Integer.MAX_VALUE); // Default limit + } + + @Test + public void testGetArgsWithAllArguments() throws Exception { + var method = LoadIndicesStep.class.getDeclaredMethod("getArgs", UpgradeContext.class); + method.setAccessible(true); + + // Test with all arguments provided + Map> parsedArgs = new HashMap<>(); + parsedArgs.put(LoadIndices.BATCH_SIZE_ARG_NAME, Optional.of("5000")); + parsedArgs.put(LoadIndices.LIMIT_ARG_NAME, Optional.of("1000")); + parsedArgs.put(LoadIndices.URN_LIKE_ARG_NAME, Optional.of("urn:li:dataset:%")); + parsedArgs.put(LoadIndices.LE_PIT_EPOCH_MS_ARG_NAME, Optional.of("1640995200000")); + parsedArgs.put(LoadIndices.GE_PIT_EPOCH_MS_ARG_NAME, Optional.of("1640908800000")); + parsedArgs.put(LoadIndices.ASPECT_NAMES_ARG_NAME, Optional.of("container,ownership")); + parsedArgs.put(LoadIndices.LAST_URN_ARG_NAME, Optional.of("urn:li:dataset:test")); + + when(mockUpgradeContext.parsedArgs()).thenReturn(parsedArgs); + + @SuppressWarnings("unchecked") + LoadIndicesArgs result = (LoadIndicesArgs) method.invoke(loadIndicesStep, mockUpgradeContext); + + assertNotNull(result); + assertEquals(result.batchSize, 5000); + assertEquals(result.limit, 1000); + assertEquals(result.urnLike, "urn:li:dataset:%"); + assertEquals(result.lePitEpochMs, Long.valueOf(1640995200000L)); + assertEquals(result.gePitEpochMs, Long.valueOf(1640908800000L)); + assertEquals(result.aspectNames, Arrays.asList("container", "ownership")); + assertEquals(result.lastUrn, "urn:li:dataset:test"); + + // Verify report messages were added + verify(mockUpgradeReport, atLeastOnce()).addLine(anyString()); + } + + @Test + public void testGetArgsWithLimitMaxValue() throws Exception { + var method = LoadIndicesStep.class.getDeclaredMethod("getArgs", UpgradeContext.class); + method.setAccessible(true); + + Map> parsedArgs = new HashMap<>(); + parsedArgs.put(LoadIndices.LIMIT_ARG_NAME, Optional.of(String.valueOf(Integer.MAX_VALUE))); + when(mockUpgradeContext.parsedArgs()).thenReturn(parsedArgs); + + @SuppressWarnings("unchecked") + LoadIndicesArgs result = (LoadIndicesArgs) method.invoke(loadIndicesStep, mockUpgradeContext); + + assertEquals(result.limit, Integer.MAX_VALUE); + verify(mockUpgradeReport).addLine("limit is not applied (processing all matching records)"); + } + + @Test + public void testGetArgsWithDefaultAspectNames() throws Exception { + var method = LoadIndicesStep.class.getDeclaredMethod("getArgs", UpgradeContext.class); + method.setAccessible(true); + + // Mock entity registry with searchable aspects + Map entitySpecs = new HashMap<>(); + EntitySpec mockEntitySpec = mock(EntitySpec.class); + AspectSpec mockAspectSpec = mock(AspectSpec.class); + + when(mockEntitySpec.getAspectSpecs()).thenReturn(Arrays.asList(mockAspectSpec)); + when(mockAspectSpec.getSearchableFieldSpecs()) + .thenReturn(Arrays.asList()); // Empty list for simplicity + when(mockAspectSpec.getName()).thenReturn("container"); + when(mockEntitySpec.getKeyAspectName()).thenReturn("datasetKey"); + + entitySpecs.put("dataset", mockEntitySpec); + when(mockEntityRegistry.getEntitySpecs()).thenReturn(entitySpecs); + + Map> parsedArgs = new HashMap<>(); + when(mockUpgradeContext.parsedArgs()).thenReturn(parsedArgs); + + @SuppressWarnings("unchecked") + LoadIndicesArgs result = (LoadIndicesArgs) method.invoke(loadIndicesStep, mockUpgradeContext); + + assertNotNull(result.aspectNames); + // Since we have no searchable aspects, the result should be empty + assertTrue(result.aspectNames.isEmpty()); + } + + @Test + public void testGetBatchSize() throws Exception { + var method = LoadIndicesStep.class.getDeclaredMethod("getBatchSize", Map.class); + method.setAccessible(true); + + Map> parsedArgs = new HashMap<>(); + parsedArgs.put(LoadIndices.BATCH_SIZE_ARG_NAME, Optional.of("5000")); + + int result = (Integer) method.invoke(loadIndicesStep, parsedArgs); + assertEquals(result, 5000); + + // Test with empty args (should return default) + Map> emptyArgs = new HashMap<>(); + int defaultResult = (Integer) method.invoke(loadIndicesStep, emptyArgs); + assertEquals(defaultResult, 10000); + } + + @Test + public void testGetLimit() throws Exception { + var method = LoadIndicesStep.class.getDeclaredMethod("getLimit", Map.class); + method.setAccessible(true); + + Map> parsedArgs = new HashMap<>(); + parsedArgs.put(LoadIndices.LIMIT_ARG_NAME, Optional.of("1000")); + + int result = (Integer) method.invoke(loadIndicesStep, parsedArgs); + assertEquals(result, 1000); + + // Test with empty args (should return default) + Map> emptyArgs = new HashMap<>(); + int defaultResult = (Integer) method.invoke(loadIndicesStep, emptyArgs); + assertEquals(defaultResult, Integer.MAX_VALUE); + } + + @Test + public void testGetInt() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod("getInt", Map.class, int.class, String.class); + method.setAccessible(true); + + Map> parsedArgs = new HashMap<>(); + parsedArgs.put("testKey", Optional.of("42")); + + int result = (Integer) method.invoke(loadIndicesStep, parsedArgs, 10, "testKey"); + assertEquals(result, 42); + + // Test with missing key (should return default) + int defaultResult = (Integer) method.invoke(loadIndicesStep, parsedArgs, 10, "missingKey"); + assertEquals(defaultResult, 10); + } + + @Test + public void testConvertToRestoreIndicesArgs() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "convertToRestoreIndicesArgs", LoadIndicesArgs.class, int.class); + method.setAccessible(true); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.aspectNames = Arrays.asList("container", "ownership"); + args.urnLike = "urn:li:dataset:%"; + args.gePitEpochMs = 1640908800000L; + args.lePitEpochMs = 1640995200000L; + args.lastUrn = "urn:li:dataset:test"; + + RestoreIndicesArgs result = (RestoreIndicesArgs) method.invoke(loadIndicesStep, args, 1000); + + assertNotNull(result); + assertEquals(result.aspectNames, Arrays.asList("container", "ownership")); + assertEquals(result.urnLike, "urn:li:dataset:%"); + assertEquals(result.gePitEpochMs, Long.valueOf(1640908800000L)); + assertEquals(result.lePitEpochMs, Long.valueOf(1640995200000L)); + assertEquals(result.limit, 1000); + assertTrue(result.urnBasedPagination); + assertEquals(result.lastUrn, "urn:li:dataset:test"); + } + + @Test + public void testConvertToRestoreIndicesArgsWithNulls() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "convertToRestoreIndicesArgs", LoadIndicesArgs.class, int.class); + method.setAccessible(true); + + LoadIndicesArgs args = new LoadIndicesArgs(); + // All fields are null + + RestoreIndicesArgs result = (RestoreIndicesArgs) method.invoke(loadIndicesStep, args, 1000); + + assertNotNull(result); + assertTrue(result.aspectNames == null || result.aspectNames.isEmpty()); + assertTrue(result.urnLike == null); + assertEquals(result.gePitEpochMs, Long.valueOf(0L)); + assertTrue(result.lePitEpochMs > 0); // Should be current time + assertEquals(result.limit, 1000); + assertFalse(result.urnBasedPagination); + assertTrue(result.lastUrn == null || result.lastUrn.isEmpty()); + } + + @Test + public void testConvertToMetadataChangeLog() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "convertToMetadataChangeLog", OperationContext.class, EbeanAspectV2.class); + method.setAccessible(true); + + // Create test aspect with proper key + EbeanAspectV2 aspect = new EbeanAspectV2(); + EbeanAspectV2.PrimaryKey key = + new EbeanAspectV2.PrimaryKey( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset,PROD)", "container", 0); + aspect.setKey(key); + aspect.setMetadata("{\"container\":{\"urn\":\"urn:li:container:test\"}}"); + aspect.setCreatedOn(Timestamp.from(Instant.now())); + aspect.setCreatedBy("testUser"); + + // Mock entity registry + EntitySpec mockEntitySpec = mock(EntitySpec.class); + AspectSpec mockAspectSpec = mock(AspectSpec.class); + when(mockEntitySpec.getAspectSpec("container")).thenReturn(mockAspectSpec); + when(mockAspectSpec.getDataTemplateClass()) + .thenReturn((Class) com.linkedin.container.Container.class); + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec); + + MetadataChangeLog result = + (MetadataChangeLog) method.invoke(loadIndicesStep, mockOperationContext, aspect); + + assertNotNull(result); + assertEquals(result.getEntityType(), "dataset"); + assertEquals(result.getChangeType(), ChangeType.RESTATE); + assertEquals(result.getAspectName(), "container"); + } + + @Test + public void testConvertToMetadataChangeLogWithSystemMetadata() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "convertToMetadataChangeLog", OperationContext.class, EbeanAspectV2.class); + method.setAccessible(true); + + // Create test aspect with proper key and system metadata + EbeanAspectV2 aspect = new EbeanAspectV2(); + EbeanAspectV2.PrimaryKey key = + new EbeanAspectV2.PrimaryKey( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset,PROD)", "container", 0); + aspect.setKey(key); + aspect.setMetadata("{\"container\":{\"urn\":\"urn:li:container:test\"}}"); + aspect.setCreatedOn(Timestamp.from(Instant.now())); + aspect.setCreatedBy("testUser"); + + // Set system metadata to test the systemMetadata handling + aspect.setSystemMetadata("{\"lastObserved\":1234567890,\"runId\":\"test-run-id\"}"); + + // Mock entity registry + EntitySpec mockEntitySpec = mock(EntitySpec.class); + AspectSpec mockAspectSpec = mock(AspectSpec.class); + when(mockEntitySpec.getAspectSpec("container")).thenReturn(mockAspectSpec); + when(mockAspectSpec.getDataTemplateClass()) + .thenReturn((Class) com.linkedin.container.Container.class); + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec); + + MetadataChangeLog result = + (MetadataChangeLog) method.invoke(loadIndicesStep, mockOperationContext, aspect); + + assertNotNull(result); + assertEquals(result.getEntityType(), "dataset"); + assertEquals(result.getChangeType(), ChangeType.RESTATE); + assertEquals(result.getAspectName(), "container"); + + // Verify that systemMetadata was properly set + assertNotNull(result.getSystemMetadata()); + assertEquals(result.getSystemMetadata().getLastObserved(), 1234567890L); + assertEquals(result.getSystemMetadata().getRunId(), "test-run-id"); + } + + @Test + public void testConvertToMetadataChangeLogWithNullSystemMetadata() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "convertToMetadataChangeLog", OperationContext.class, EbeanAspectV2.class); + method.setAccessible(true); + + // Create test aspect with proper key but no system metadata + EbeanAspectV2 aspect = new EbeanAspectV2(); + EbeanAspectV2.PrimaryKey key = + new EbeanAspectV2.PrimaryKey( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset,PROD)", "container", 0); + aspect.setKey(key); + aspect.setMetadata("{\"container\":{\"urn\":\"urn:li:container:test\"}}"); + aspect.setCreatedOn(Timestamp.from(Instant.now())); + aspect.setCreatedBy("testUser"); + + // Explicitly set systemMetadata to null to test the null handling + aspect.setSystemMetadata(null); + + // Mock entity registry + EntitySpec mockEntitySpec = mock(EntitySpec.class); + AspectSpec mockAspectSpec = mock(AspectSpec.class); + when(mockEntitySpec.getAspectSpec("container")).thenReturn(mockAspectSpec); + when(mockAspectSpec.getDataTemplateClass()) + .thenReturn((Class) com.linkedin.container.Container.class); + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec); + + MetadataChangeLog result = + (MetadataChangeLog) method.invoke(loadIndicesStep, mockOperationContext, aspect); + + assertNotNull(result); + assertEquals(result.getEntityType(), "dataset"); + assertEquals(result.getChangeType(), ChangeType.RESTATE); + assertEquals(result.getAspectName(), "container"); + + // Verify that systemMetadata is null when input is null + assertNull(result.getSystemMetadata()); + } + + @Test(expectedExceptions = Exception.class) + public void testConvertToMetadataChangeLogWithInvalidUrn() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "convertToMetadataChangeLog", OperationContext.class, EbeanAspectV2.class); + method.setAccessible(true); + + // Create test aspect with invalid URN + EbeanAspectV2 aspect = new EbeanAspectV2(); + aspect.setUrn("invalid-urn"); + aspect.setAspect("container"); + aspect.setVersion(0); + aspect.setMetadata("{}"); + aspect.setCreatedOn(Timestamp.from(Instant.now())); + aspect.setCreatedBy("testUser"); + + method.invoke(loadIndicesStep, mockOperationContext, aspect); + } + + @Test + public void testWriteBatchWithRetrySuccess() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + verify(mockUpdateIndicesService, times(1)).handleChangeEvents(any(), any()); + assertEquals(result.ignored, 0); + } + + @Test + public void testWriteBatchWithRetryFailureAndSplit() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // First call fails, second succeeds + doThrow(new RuntimeException("First attempt fails")) + .doNothing() + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + // Should have been called multiple times due to retry and split + verify(mockUpdateIndicesService, atLeast(2)).handleChangeEvents(any(), any()); + } + + @Test + public void testWriteBatchWithRetryMaxRetriesExceeded() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // Always fail + doThrow(new RuntimeException("Always fails")) + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + // Should have been called multiple times due to retries and batch splitting + verify(mockUpdateIndicesService, atLeast(4)).handleChangeEvents(any(), any()); + assertEquals(result.ignored, 4); // All items should be marked as ignored after max retries + } + + @Test + public void testWriteBatchWithRetrySuccessAfterRetries() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // Fail first 2 times, succeed on 3rd attempt + doThrow(new RuntimeException("Fail attempt 1")) + .doThrow(new RuntimeException("Fail attempt 2")) + .doNothing() + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + // Should have been called 3 times (2 failures + 1 success) + verify(mockUpdateIndicesService, times(3)).handleChangeEvents(any(), any()); + assertEquals(result.ignored, 1); // First half of split batch gets ignored, second half succeeds + } + + @Test + public void testWriteBatchWithRetryMaxRetriesExceededWithSmallBatch() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // Always fail - should exceed max retries (3) and split batch + doThrow(new RuntimeException("Always fails")) + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + // Should have been called 3 times: initial attempt + 2 retries before exceeding max retries + verify(mockUpdateIndicesService, times(3)).handleChangeEvents(any(), any()); + assertEquals(result.ignored, 2); // Both items should be marked as ignored after max retries + } + + @Test + public void testWriteBatchWithRetryBatchSplitting() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // Fail on full batch, succeed on split batches + doThrow(new RuntimeException("Full batch fails")) + .doNothing() // First half succeeds + .doNothing() // Second half succeeds + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + // Should have been called 3 times: 1 full batch failure + 2 successful split batches + verify(mockUpdateIndicesService, times(3)).handleChangeEvents(any(), any()); + assertEquals(result.ignored, 0); // No items should be ignored since splits succeeded + } + + @Test + public void testWriteBatchWithRetryFirstHalfFailure() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // Fail on full batch and first half, succeed on second half + doThrow(new RuntimeException("Full batch fails")) + .doThrow(new RuntimeException("First half fails")) + .doNothing() // Second half succeeds + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + // Should have been called 3 times: 1 full batch failure + 1 first half failure + 1 second half + // success + verify(mockUpdateIndicesService, times(3)).handleChangeEvents(any(), any()); + assertEquals(result.ignored, 2); // First half (2 items) should be ignored + } + + @Test + public void testWriteBatchWithRetrySingleItemFailure() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "writeBatchWithRetry", + OperationContext.class, + List.class, + LoadIndicesResult.class, + java.util.function.Function.class); + method.setAccessible(true); + + List batch = new ArrayList<>(); + batch.add(mock(MetadataChangeLog.class)); + + LoadIndicesResult result = new LoadIndicesResult(); + + // Always fail + doThrow(new RuntimeException("Always fails")) + .when(mockUpdateIndicesService) + .handleChangeEvents(any(), any()); + + method.invoke( + loadIndicesStep, + mockOperationContext, + batch, + result, + (java.util.function.Function) msg -> null); + + assertEquals( + result.ignored, + 1); // Single item should be marked as ignored when it can't be split further + } + + @Test + public void testProcessAllDataDirectlyWithConversionErrors() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // Mock the updateIndicesService to not throw exceptions + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + // Create test args with limit + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.aspectNames = java.util.List.of("container", "ownership"); + + // Add a test row with invalid metadata to cause conversion error + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,InvalidDataset,PROD)", + "container", + 0, + Instant.now(), + "testUser"); + + // Call the method + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testProcessAllDataDirectlyWithLimit() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 1; // Very small limit + args.aspectNames = java.util.List.of("container"); + + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testProcessAllDataDirectlyWithUrnLike() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.urnLike = "urn:li:dataset:%"; + args.aspectNames = java.util.List.of("container"); + + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testProcessAllDataDirectlyWithTimeFilters() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.gePitEpochMs = Instant.now().minusSeconds(3600).toEpochMilli(); // 1 hour ago + args.lePitEpochMs = Instant.now().toEpochMilli(); // now + args.aspectNames = java.util.List.of("container"); + + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testProcessAllDataDirectlyWithLastUrn() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.lastUrn = "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleDataset1,PROD)"; + args.aspectNames = java.util.List.of("container"); + + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testProcessAllDataDirectlyWithEmptyBatch() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.aspectNames = java.util.List.of("nonexistent"); // No matching aspects + + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } + + @Test + public void testProcessAllDataDirectlyWithFinalFlushFailure() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // Mock successful batch processing but fail on final flush + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + doThrow(new RuntimeException("Flush failed")).when(mockUpdateIndicesService).flush(); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.aspectNames = java.util.List.of("container"); + + // Execute - should not throw exception despite flush failure + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + // Verify flush was called and failed gracefully + verify(mockUpdateIndicesService).flush(); + assertNotNull(result); + } + + @Test + public void testGetDefaultAspectNamesWithSearchableAspects() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod("getDefaultAspectNames", OperationContext.class); + method.setAccessible(true); + + // Create mock entity specs with searchable aspects + com.linkedin.metadata.models.EntitySpec mockEntitySpec1 = + mock(com.linkedin.metadata.models.EntitySpec.class); + com.linkedin.metadata.models.EntitySpec mockEntitySpec2 = + mock(com.linkedin.metadata.models.EntitySpec.class); + + // Create mock aspect specs + com.linkedin.metadata.models.AspectSpec mockAspectSpec1 = + mock(com.linkedin.metadata.models.AspectSpec.class); + com.linkedin.metadata.models.AspectSpec mockAspectSpec2 = + mock(com.linkedin.metadata.models.AspectSpec.class); + com.linkedin.metadata.models.AspectSpec mockKeyAspectSpec = + mock(com.linkedin.metadata.models.AspectSpec.class); + + // Create mock searchable field specs + com.linkedin.metadata.models.SearchableFieldSpec mockSearchableFieldSpec = + mock(com.linkedin.metadata.models.SearchableFieldSpec.class); + + // Setup entity registry mock + when(mockEntityRegistry.getEntitySpecs()) + .thenReturn( + java.util.Map.of( + "dataset", mockEntitySpec1, + "chart", mockEntitySpec2)); + + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec1); + when(mockEntityRegistry.getEntitySpec("chart")).thenReturn(mockEntitySpec2); + + // Setup entity specs + when(mockEntitySpec1.getAspectSpecs()) + .thenReturn(java.util.List.of(mockAspectSpec1, mockKeyAspectSpec)); + when(mockEntitySpec2.getAspectSpecs()).thenReturn(java.util.List.of(mockAspectSpec2)); + + // Setup aspect specs - first has searchable fields, second doesn't + when(mockAspectSpec1.getName()).thenReturn("datasetProperties"); + when(mockAspectSpec1.getSearchableFieldSpecs()) + .thenReturn(java.util.List.of(mockSearchableFieldSpec)); + + when(mockAspectSpec2.getName()).thenReturn("chartInfo"); + when(mockAspectSpec2.getSearchableFieldSpecs()).thenReturn(java.util.List.of()); + + when(mockKeyAspectSpec.getName()).thenReturn("datasetKey"); + when(mockKeyAspectSpec.getSearchableFieldSpecs()).thenReturn(java.util.List.of()); + + // Setup key aspect names + when(mockEntitySpec1.getKeyAspectName()).thenReturn("datasetKey"); + when(mockEntitySpec2.getKeyAspectName()).thenReturn("chartKey"); + + // Execute method + @SuppressWarnings("unchecked") + java.util.Set result = + (java.util.Set) method.invoke(loadIndicesStep, mockOperationContext); + + // Verify results + assertNotNull(result); + assertTrue(result.contains("datasetProperties")); // Has searchable fields + assertTrue(result.contains("datasetKey")); // Key aspect included + assertFalse(result.contains("chartInfo")); // No searchable fields, not included + assertFalse(result.contains("chartKey")); // Key aspect not included since no searchable aspects + } + + @Test + public void testGetDefaultAspectNamesWithEntityProcessingError() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod("getDefaultAspectNames", OperationContext.class); + method.setAccessible(true); + + // Create mock entity specs + com.linkedin.metadata.models.EntitySpec mockEntitySpec1 = + mock(com.linkedin.metadata.models.EntitySpec.class); + com.linkedin.metadata.models.EntitySpec mockEntitySpec2 = + mock(com.linkedin.metadata.models.EntitySpec.class); + + // Setup entity registry mock + when(mockEntityRegistry.getEntitySpecs()) + .thenReturn( + java.util.Map.of( + "dataset", mockEntitySpec1, + "chart", mockEntitySpec2)); + + // First entity works fine + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec1); + com.linkedin.metadata.models.AspectSpec mockAspectSpec1 = + mock(com.linkedin.metadata.models.AspectSpec.class); + when(mockEntitySpec1.getAspectSpecs()).thenReturn(java.util.List.of(mockAspectSpec1)); + when(mockAspectSpec1.getName()).thenReturn("datasetProperties"); + when(mockAspectSpec1.getSearchableFieldSpecs()) + .thenReturn( + java.util.List.of(mock(com.linkedin.metadata.models.SearchableFieldSpec.class))); + when(mockEntitySpec1.getKeyAspectName()).thenReturn("datasetKey"); + + // Second entity throws exception + when(mockEntityRegistry.getEntitySpec("chart")) + .thenThrow(new RuntimeException("Entity processing failed")); + + // Execute method - should not throw exception + @SuppressWarnings("unchecked") + java.util.Set result = + (java.util.Set) method.invoke(loadIndicesStep, mockOperationContext); + + // Verify results - should still include aspects from first entity + assertNotNull(result); + assertTrue(result.contains("datasetProperties")); + assertTrue(result.contains("datasetKey")); + } + + @Test + public void testGetDefaultAspectNamesWithNoSearchableAspects() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod("getDefaultAspectNames", OperationContext.class); + method.setAccessible(true); + + // Create mock entity spec with no searchable aspects + com.linkedin.metadata.models.EntitySpec mockEntitySpec = + mock(com.linkedin.metadata.models.EntitySpec.class); + com.linkedin.metadata.models.AspectSpec mockAspectSpec = + mock(com.linkedin.metadata.models.AspectSpec.class); + + // Setup entity registry mock + when(mockEntityRegistry.getEntitySpecs()) + .thenReturn(java.util.Map.of("dataset", mockEntitySpec)); + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec); + + // Setup aspect spec with no searchable fields + when(mockEntitySpec.getAspectSpecs()).thenReturn(java.util.List.of(mockAspectSpec)); + when(mockAspectSpec.getName()).thenReturn("datasetProperties"); + when(mockAspectSpec.getSearchableFieldSpecs()).thenReturn(java.util.List.of()); + when(mockEntitySpec.getKeyAspectName()).thenReturn("datasetKey"); + + // Execute method + @SuppressWarnings("unchecked") + java.util.Set result = + (java.util.Set) method.invoke(loadIndicesStep, mockOperationContext); + + // Verify results - should be empty since no searchable aspects + assertNotNull(result); + assertTrue(result.isEmpty()); + } + + @Test + public void testProcessAllDataDirectlyWithNonEmptyBatchButAllConversionErrors() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // Mock the updateIndicesService to not throw exceptions + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + // Create test args with small batch size to ensure we hit the batch processing logic + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 2; // Small batch size to trigger batch processing + args.limit = 10; + args.aspectNames = java.util.List.of("container", "ownership"); + + // Add test rows with invalid metadata to cause conversion errors + // This will create a batch that is not empty but all aspects fail conversion + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,InvalidDataset1,PROD)", + "container", + 0, + Instant.now(), + "testUser"); + + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,InvalidDataset2,PROD)", + "ownership", + 0, + Instant.now(), + "testUser"); + + // Add a valid row to ensure we have some data to process + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,ValidDataset,PROD)", + "container", + 0, + Instant.now(), + "testUser"); + + // Call the method + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + + // Verify that the method completed successfully even with conversion errors + LoadIndicesResult loadResult = (LoadIndicesResult) result; + assertTrue(loadResult.ignored > 0); // Some aspects should be ignored due to conversion errors + } + + @Test + public void testProcessAllDataDirectlyWithBatchProcessing() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // Mock the updateIndicesService to not throw exceptions + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + // Setup entity registry mock for dataset entity + EntitySpec mockEntitySpec = mock(EntitySpec.class); + AspectSpec mockAspectSpec = mock(AspectSpec.class); + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec); + when(mockEntitySpec.getAspectSpec("container")).thenReturn(mockAspectSpec); + when(mockAspectSpec.getDataTemplateClass()) + .thenReturn((Class) com.linkedin.container.Container.class); + + // Create test args with small batch size to ensure batch processing is triggered + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 2; // Small batch size to trigger batch processing + args.limit = 5; + args.aspectNames = java.util.List.of("container"); + + // Add test rows with valid aspect data to ensure we have data to process + insertTestRowWithValidData( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,TestDataset1,PROD)", + "container", + 0, + Instant.now(), + "urn:li:corpuser:testUser"); + + insertTestRowWithValidData( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,TestDataset2,PROD)", + "container", + 0, + Instant.now(), + "urn:li:corpuser:testUser"); + + // Call the method + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + + // Verify that the method completed successfully + LoadIndicesResult loadResult = (LoadIndicesResult) result; + assertNotNull(loadResult); + // The key test is that the method doesn't throw an exception when processing batches + // This tests the !mclBatch.isEmpty() logic path + } + + @Test + public void testProcessAllDataDirectlyWithProgressReporting() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // Mock the updateIndicesService to not throw exceptions + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + // Setup entity registry mock for dataset entity + EntitySpec mockEntitySpec = mock(EntitySpec.class); + AspectSpec mockAspectSpec = mock(AspectSpec.class); + when(mockEntityRegistry.getEntitySpec("dataset")).thenReturn(mockEntitySpec); + when(mockEntitySpec.getAspectSpec("container")).thenReturn(mockAspectSpec); + when(mockAspectSpec.getDataTemplateClass()) + .thenReturn((Class) com.linkedin.container.Container.class); + + // Create test args with small batch size to trigger progress reporting + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 2; // Small batch size to trigger batch processing + args.limit = 10; + args.aspectNames = java.util.List.of("container"); + + // Add multiple test rows with valid aspect data + for (int i = 0; i < 5; i++) { + insertTestRowWithValidData( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,TestDataset" + i + ",PROD)", + "container", + 0, + Instant.now(), + "urn:li:corpuser:testUser"); + } + + // Track progress messages + java.util.List progressMessages = new java.util.ArrayList<>(); + java.util.function.Function reportFunction = + msg -> { + progressMessages.add(msg); + return null; + }; + + // Call the method + Object result = method.invoke(loadIndicesStep, mockOperationContext, args, reportFunction); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + + // Verify that progress messages were generated + assertTrue(progressMessages.size() > 0); + + // Check that we have "Last URN processed" messages + boolean hasLastUrnMessage = progressMessages.stream().anyMatch(msg -> msg.contains("Last URN")); + assertTrue(hasLastUrnMessage, "Should have 'Last URN' messages"); + } + + @Test + public void testProcessAllDataDirectlyWithLargeDatasetETA() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + // Mock the updateIndicesService to not throw exceptions + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + + // Create test args that will trigger ETA calculation (>50000 processed) + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 1000; // Larger batch size + args.limit = 60000; // Set limit to trigger ETA calculation + args.aspectNames = java.util.List.of("container"); + + // Add many test rows to simulate large dataset + for (int i = 0; i < 100; i++) { + insertTestRow( + "urn:li:dataset:(urn:li:dataPlatform:hdfs,LargeDataset" + i + ",PROD)", + "container", + 0, + Instant.now(), + "testUser"); + } + + // Track progress messages + java.util.List progressMessages = new java.util.ArrayList<>(); + java.util.function.Function reportFunction = + msg -> { + progressMessages.add(msg); + return null; + }; + + // Call the method + Object result = method.invoke(loadIndicesStep, mockOperationContext, args, reportFunction); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + + // Verify that progress messages were generated + assertTrue(progressMessages.size() > 0); + + // Check for ETA-related messages (though they may not appear due to test data size) + boolean hasProgressMessage = + progressMessages.stream().anyMatch(msg -> msg.contains("aspects/sec")); + assertTrue(hasProgressMessage, "Should have throughput messages"); + } + + @Test + public void testProcessAllDataDirectlyWithFlushFailure() throws Exception { + var method = + LoadIndicesStep.class.getDeclaredMethod( + "processAllDataDirectly", + OperationContext.class, + LoadIndicesArgs.class, + java.util.function.Function.class); + method.setAccessible(true); + + doNothing().when(mockUpdateIndicesService).handleChangeEvents(any(), any()); + doThrow(new RuntimeException("Flush failed")).when(mockUpdateIndicesService).flush(); + + LoadIndicesArgs args = new LoadIndicesArgs(); + args.batchSize = 100; + args.limit = 10; + args.aspectNames = java.util.List.of("container"); + + // Should not throw exception even if flush fails + Object result = + method.invoke( + loadIndicesStep, + mockOperationContext, + args, + (java.util.function.Function) msg -> null); + + assertNotNull(result); + verify(mockUpdateIndicesService, atLeastOnce()).flush(); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesTest.java new file mode 100644 index 000000000000..bc8091a40c08 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesTest.java @@ -0,0 +1,99 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.service.UpdateIndicesService; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import io.datahubproject.metadata.context.OperationContext; +import io.ebean.Database; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class LoadIndicesTest { + + @Mock private OperationContext mockOperationContext; + @Mock private Database mockDatabase; + @Mock private EntityService mockEntityService; + @Mock private UpdateIndicesService mockUpdateIndicesService; + @Mock private LoadIndicesIndexManager mockIndexManager; + @Mock private SystemMetadataService mockSystemMetadataService; + @Mock private TimeseriesAspectService mockTimeseriesAspectService; + @Mock private EntitySearchService mockEntitySearchService; + @Mock private GraphService mockGraphService; + @Mock private AspectDao mockAspectDao; + + private LoadIndices loadIndices; + + @BeforeMethod + public void setUp() { + MockitoAnnotations.openMocks(this); + loadIndices = + new LoadIndices( + mockDatabase, + mockEntityService, + mockUpdateIndicesService, + mockIndexManager, + mockSystemMetadataService, + mockTimeseriesAspectService, + mockEntitySearchService, + mockGraphService, + mockAspectDao); + } + + @Test + public void testLoadIndicesInit() { + assertNotNull(loadIndices); + assertEquals("LoadIndices", loadIndices.id()); + assertTrue(loadIndices.steps().size() >= 1); + + // With mocked dependencies, the first step should be BuildIndicesStep + // and the last step should be LoadIndicesStep + UpgradeStep firstStep = loadIndices.steps().get(0); + assertTrue(firstStep instanceof BuildIndicesStep); + + UpgradeStep lastStep = loadIndices.steps().get(loadIndices.steps().size() - 1); + assertTrue(lastStep instanceof LoadIndicesStep); + } + + @Test + public void testLoadIndicesStepId() { + // Test the LoadIndicesStep (last step) + UpgradeStep loadIndicesStep = loadIndices.steps().get(loadIndices.steps().size() - 1); + assertEquals("LoadIndicesStep", loadIndicesStep.id()); + assertEquals(0, loadIndicesStep.retryCount()); + + // Also test the first step (BuildIndicesStep if dependencies are provided) + UpgradeStep firstStep = loadIndices.steps().get(0); + assertTrue(firstStep instanceof BuildIndicesStep); + assertEquals(0, firstStep.retryCount()); + } + + @Test + public void testLoadIndicesCleanupSteps() { + assertTrue(loadIndices.cleanupSteps().isEmpty()); + } + + @Test + public void testLoadIndicesWithNullDependencies() { + // Test constructor with null dependencies (graceful degradation) + LoadIndices loadIndicesWithoutDeps = + new LoadIndices(null, null, null, null, null, null, null, null, null); + assertNotNull(loadIndicesWithoutDeps); + assertEquals("LoadIndices", loadIndicesWithoutDeps.id()); + // When server or indexManager is null, should return empty steps list + assertEquals(loadIndicesWithoutDeps.steps().size(), 0); + assertTrue(loadIndicesWithoutDeps.cleanupSteps().isEmpty()); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/NoOpKafkaEventProducerTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/NoOpKafkaEventProducerTest.java new file mode 100644 index 000000000000..43f164adbe54 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/NoOpKafkaEventProducerTest.java @@ -0,0 +1,241 @@ +package com.linkedin.datahub.upgrade.loadindices; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.mxe.DataHubUpgradeHistoryEvent; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.PlatformEvent; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Set; +import java.util.concurrent.Future; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class NoOpKafkaEventProducerTest { + + private NoOpKafkaEventProducer producer; + private Urn testUrn; + private AspectSpec mockAspectSpec; + + @BeforeMethod + public void setUp() { + producer = new NoOpKafkaEventProducer(); + testUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"); + mockAspectSpec = mock(AspectSpec.class); + } + + @Test + public void testConstructor() { + assertNotNull(producer); + } + + @Test + public void testProduceMetadataChangeLog() throws Exception { + MetadataChangeLog mcl = new MetadataChangeLog(); + mcl.setEntityUrn(testUrn); + + Future result = producer.produceMetadataChangeLog(testUrn, mockAspectSpec, mcl); + + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); // Should complete immediately with null + } + + @Test + public void testProduceMetadataChangeLogWithNullAspectSpec() throws Exception { + MetadataChangeLog mcl = new MetadataChangeLog(); + mcl.setEntityUrn(testUrn); + + Future result = producer.produceMetadataChangeLog(testUrn, null, mcl); + + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); + } + + @Test + public void testGetMetadataChangeLogTopicName() { + String topicName = producer.getMetadataChangeLogTopicName(mockAspectSpec); + + assertEquals(topicName, "no-op-mcl-topic"); + } + + @Test + public void testProduceMetadataChangeProposal() throws Exception { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(testUrn); + + Future result = producer.produceMetadataChangeProposal(testUrn, mcp); + + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); + } + + @Test + public void testGetMetadataChangeProposalTopicName() { + String topicName = producer.getMetadataChangeProposalTopicName(); + + assertEquals(topicName, "no-op-mcp-topic"); + } + + @Test + public void testProduceFailedMetadataChangeProposalAsync() throws Exception { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(testUrn); + + OperationContext mockOpContext = mock(OperationContext.class); + Set throwables = Set.of(new RuntimeException("Test error")); + + Future result = + producer.produceFailedMetadataChangeProposalAsync(mockOpContext, mcp, throwables); + + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); + } + + @Test + public void testProducePlatformEvent() throws Exception { + PlatformEvent platformEvent = new PlatformEvent(); + String eventName = "test-event"; + String key = "test-key"; + + Future result = producer.producePlatformEvent(eventName, key, platformEvent); + + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); + } + + @Test + public void testProducePlatformEventWithNullKey() throws Exception { + PlatformEvent platformEvent = new PlatformEvent(); + String eventName = "test-event"; + + Future result = producer.producePlatformEvent(eventName, null, platformEvent); + + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); + } + + @Test + public void testGetPlatformEventTopicName() { + String topicName = producer.getPlatformEventTopicName(); + + assertEquals(topicName, "no-op-platform-topic"); + } + + @Test + public void testProduceDataHubUpgradeHistoryEvent() { + DataHubUpgradeHistoryEvent event = new DataHubUpgradeHistoryEvent(); + + // Should not throw exception + producer.produceDataHubUpgradeHistoryEvent(event); + } + + @Test + public void testFlush() { + // Should not throw exception + producer.flush(); + } + + @Test + public void testMultipleCalls() { + MetadataChangeLog mcl = new MetadataChangeLog(); + mcl.setEntityUrn(testUrn); + + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(testUrn); + + PlatformEvent platformEvent = new PlatformEvent(); + + // Multiple calls should all work + Future result1 = producer.produceMetadataChangeLog(testUrn, mockAspectSpec, mcl); + Future result2 = producer.produceMetadataChangeProposal(testUrn, mcp); + Future result3 = producer.producePlatformEvent("test", "key", platformEvent); + + assertNotNull(result1); + assertNotNull(result2); + assertNotNull(result3); + + assertTrue(result1.isDone()); + assertTrue(result2.isDone()); + assertTrue(result3.isDone()); + } + + @Test + public void testConcurrentCalls() throws Exception { + int numThreads = 10; + Thread[] threads = new Thread[numThreads]; + Future[] results = new Future[numThreads]; + + for (int i = 0; i < numThreads; i++) { + final int threadId = i; + threads[i] = + new Thread( + () -> { + MetadataChangeLog mcl = new MetadataChangeLog(); + mcl.setEntityUrn(testUrn); + results[threadId] = producer.produceMetadataChangeLog(testUrn, mockAspectSpec, mcl); + }); + } + + // Start all threads + for (Thread thread : threads) { + thread.start(); + } + + // Wait for all threads to complete + for (Thread thread : threads) { + thread.join(); + } + + // Verify all results + for (Future result : results) { + assertNotNull(result); + assertTrue(result.isDone()); + assertNull(result.get()); + } + } + + @Test + public void testProducerMethods() { + // Test that the internal producer methods work without throwing exceptions + producer.flush(); + + // These methods should not throw exceptions + try { + producer.flush(); + } catch (Exception e) { + fail("flush() should not throw exceptions"); + } + } + + @Test + public void testTopicNameConsistency() { + // Verify topic names are consistent + assertEquals(producer.getMetadataChangeLogTopicName(mockAspectSpec), "no-op-mcl-topic"); + assertEquals(producer.getMetadataChangeProposalTopicName(), "no-op-mcp-topic"); + assertEquals(producer.getPlatformEventTopicName(), "no-op-platform-topic"); + } + + @Test + public void testNullHandling() { + // Test with null inputs + try { + producer.produceMetadataChangeLog(testUrn, null, null); + producer.produceMetadataChangeProposal(testUrn, null); + producer.producePlatformEvent(null, null, null); + producer.produceDataHubUpgradeHistoryEvent(null); + } catch (Exception e) { + fail("Methods should handle null inputs gracefully: " + e.getMessage()); + } + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfigTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfigTest.java new file mode 100644 index 000000000000..d306e95b5812 --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfigTest.java @@ -0,0 +1,91 @@ +package com.linkedin.datahub.upgrade.loadindices.config; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import com.linkedin.datahub.upgrade.loadindices.LoadIndices; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.metadata.utils.elasticsearch.SearchClientShim; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.SearchContext; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class LoadIndicesConfigTest { + + @Mock private SearchClientShim mockSearchClient; + @Mock private IndexConvention mockIndexConvention; + @Mock private EntityRegistry mockEntityRegistry; + @Mock private SearchContext mockSearchContext; + @Mock private OperationContext mockOperationContext; + @Mock private ESIndexBuilder mockIndexBuilder; + + private LoadIndicesConfig config; + + @BeforeMethod + public void setUp() { + MockitoAnnotations.openMocks(this); + config = new LoadIndicesConfig(); + + // Setup mock operation context + org.mockito.Mockito.when(mockOperationContext.getSearchContext()).thenReturn(mockSearchContext); + org.mockito.Mockito.when(mockSearchContext.getIndexConvention()) + .thenReturn(mockIndexConvention); + org.mockito.Mockito.when(mockOperationContext.getEntityRegistry()) + .thenReturn(mockEntityRegistry); + } + + @Test + public void testLoadIndicesConfigClass() { + // Test that the LoadIndicesConfig class can be instantiated + assertNotNull(config); + } + + @Test + public void testLoadIndicesClass() { + // Test that the LoadIndices class can be instantiated with null dependencies + LoadIndices loadIndices = new LoadIndices(null, null, null, null, null, null, null, null, null); + assertNotNull(loadIndices); + assertEquals(loadIndices.id(), "LoadIndices"); + assertNotNull(loadIndices.steps()); + } + + @Test + public void testCreateIndexManager() throws Exception { + // Test that createIndexManager method creates LoadIndicesIndexManager successfully + // This test verifies that the method works with proper mocks + var result = + config.createIndexManager(mockOperationContext, mockSearchClient, mockIndexBuilder); + assertNotNull(result); + + // Verify that the operation context methods were called + org.mockito.Mockito.verify(mockOperationContext).getSearchContext(); + } + + @Test + public void testCreateIndexManagerWithCustomRefreshInterval() throws Exception { + // Test that createIndexManager method works with custom index builder + var result = + config.createIndexManager(mockOperationContext, mockSearchClient, mockIndexBuilder); + assertNotNull(result); + + // Verify that the operation context methods were called + org.mockito.Mockito.verify(mockOperationContext).getSearchContext(); + } + + @Test + public void testOperationContextIntegration() throws Exception { + // Test that the operation context is properly used in createIndexManager + // This verifies that the method correctly accesses the search context + var result = + config.createIndexManager(mockOperationContext, mockSearchClient, mockIndexBuilder); + assertNotNull(result); + + // Verify that the operation context methods were called + org.mockito.Mockito.verify(mockOperationContext).getSearchContext(); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStepTest.java index ae4a73e219b9..11356e6a72c5 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStepTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStepTest.java @@ -25,6 +25,7 @@ import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.MockitoAnnotations; +import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -47,9 +48,10 @@ public class SendMAEStepTest { public void setup() { MockitoAnnotations.openMocks(this); - // Create a real H2 in-memory database for testing + // Create a real H2 in-memory database for testing with a unique name to avoid conflicts String instanceId = "sendmae_" + UUID.randomUUID().toString().replace("-", ""); - database = EbeanTestUtils.createTestServer(instanceId); + String serverName = "sendmae_test_" + UUID.randomUUID().toString().replace("-", ""); + database = EbeanTestUtils.createNamedTestServer(instanceId, serverName); // Setup the test database with required schema if needed setupTestDatabase(); @@ -72,6 +74,13 @@ public void setup() { .thenReturn(Collections.singletonList(mockResult)); } + @AfterMethod + public void cleanup() { + if (database != null) { + database.shutdown(); + } + } + private void setupTestDatabase() { // Insert a few test rows insertTestRow("urn:li:test:1", "testAspect", 0, Instant.now(), "testUser"); diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/shared/ElasticSearchUpgradeUtilsTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/shared/ElasticSearchUpgradeUtilsTest.java new file mode 100644 index 000000000000..7afcbed3fd2f --- /dev/null +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/shared/ElasticSearchUpgradeUtilsTest.java @@ -0,0 +1,322 @@ +package com.linkedin.datahub.upgrade.shared; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.EntityAspect; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class ElasticSearchUpgradeUtilsTest { + + private GraphService mockGraphService; + private EntitySearchService mockEntitySearchService; + private SystemMetadataService mockSystemMetadataService; + private TimeseriesAspectService mockTimeseriesAspectService; + private AspectDao mockAspectDao; + + @BeforeMethod + public void setUp() { + mockGraphService = mock(GraphService.class); + mockEntitySearchService = mock(EntitySearchService.class); + mockSystemMetadataService = mock(SystemMetadataService.class); + mockTimeseriesAspectService = mock(TimeseriesAspectService.class); + mockAspectDao = mock(AspectDao.class); + } + + @Test + public void testCreateElasticSearchIndexedServicesAllImplement() { + // Create mock services that implement ElasticSearchIndexed + GraphService mockGraphService = + mock(GraphService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + EntitySearchService mockEntitySearchService = + mock(EntitySearchService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + SystemMetadataService mockSystemMetadataService = + mock( + SystemMetadataService.class, + withSettings().extraInterfaces(ElasticSearchIndexed.class)); + TimeseriesAspectService mockTimeseriesAspectService = + mock( + TimeseriesAspectService.class, + withSettings().extraInterfaces(ElasticSearchIndexed.class)); + + List result = + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + mockGraphService, + mockEntitySearchService, + mockSystemMetadataService, + mockTimeseriesAspectService); + + assertNotNull(result); + assertEquals(result.size(), 4); + } + + @Test + public void testCreateElasticSearchIndexedServicesNoneImplement() { + // Create mock services that don't implement ElasticSearchIndexed + GraphService mockGraphService = mock(GraphService.class); + EntitySearchService mockEntitySearchService = mock(EntitySearchService.class); + SystemMetadataService mockSystemMetadataService = mock(SystemMetadataService.class); + TimeseriesAspectService mockTimeseriesAspectService = mock(TimeseriesAspectService.class); + + List result = + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + mockGraphService, + mockEntitySearchService, + mockSystemMetadataService, + mockTimeseriesAspectService); + + assertNotNull(result); + assertTrue(result.isEmpty()); + } + + @Test + public void testCreateElasticSearchIndexedServicesMixed() { + // Create a mix of services - some implement ElasticSearchIndexed, some don't + GraphService mockGraphService = + mock(GraphService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + EntitySearchService mockEntitySearchService = + mock(EntitySearchService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + SystemMetadataService mockSystemMetadataService = mock(SystemMetadataService.class); + TimeseriesAspectService mockTimeseriesAspectService = mock(TimeseriesAspectService.class); + + List result = + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + mockGraphService, + mockEntitySearchService, + mockSystemMetadataService, + mockTimeseriesAspectService); + + assertNotNull(result); + assertEquals(result.size(), 2); + } + + @Test + public void testCreateElasticSearchIndexedServicesWithNulls() { + // Test with null services + EntitySearchService mockEntitySearchService = + mock(EntitySearchService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + SystemMetadataService mockSystemMetadataService = + mock( + SystemMetadataService.class, + withSettings().extraInterfaces(ElasticSearchIndexed.class)); + + List result = + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + null, mockEntitySearchService, mockSystemMetadataService, null); + + assertNotNull(result); + assertEquals(result.size(), 2); // null services are filtered out + } + + @Test + public void testGetActiveStructuredPropertiesDefinitions() { + // Create a fresh mock for this test + AspectDao freshMockAspectDao = mock(AspectDao.class); + + // Mock EntityAspect for active structured property + EntityAspect activeAspect = mock(EntityAspect.class); + Urn activeUrn = UrnUtils.getUrn("urn:li:structuredProperty:active"); + StructuredPropertyDefinition activeDefinition = new StructuredPropertyDefinition(); + activeDefinition.setQualifiedName("active.property"); + + Status activeStatus = new Status(); + activeStatus.setRemoved(false); + + when(activeAspect.getUrn()).thenReturn(activeUrn.toString()); + when(activeAspect.getMetadata()).thenReturn(RecordUtils.toJsonString(activeDefinition)); + + // Mock EntityAspect for removed structured property + EntityAspect removedAspect = mock(EntityAspect.class); + Urn removedUrn = UrnUtils.getUrn("urn:li:structuredProperty:removed"); + StructuredPropertyDefinition removedDefinition = new StructuredPropertyDefinition(); + removedDefinition.setQualifiedName("removed.property"); + + Status removedStatus = new Status(); + removedStatus.setRemoved(true); + + when(removedAspect.getUrn()).thenReturn(removedUrn.toString()); + when(removedAspect.getMetadata()).thenReturn(RecordUtils.toJsonString(removedStatus)); + + // Mock AspectDao to return streams for both calls (removed properties and all properties) + when(freshMockAspectDao.streamAspects(anyString(), anyString())) + .thenReturn(Stream.of(removedAspect)) // First call for removed properties + .thenReturn(Stream.of(activeAspect)); // Second call for all properties (only active) + + Set> result = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(freshMockAspectDao); + + assertNotNull(result); + assertEquals(result.size(), 1); + + // Verify only active property is returned + Pair activePair = result.iterator().next(); + assertEquals(activePair.getFirst(), activeUrn); + assertEquals(activePair.getSecond().getQualifiedName(), activeDefinition.getQualifiedName()); + } + + @Test + public void testGetActiveStructuredPropertiesDefinitionsEmpty() { + // Create a fresh mock for this test + AspectDao freshMockAspectDao = mock(AspectDao.class); + + // Mock AspectDao to return empty streams for both calls (removed properties and all properties) + when(freshMockAspectDao.streamAspects(anyString(), anyString())) + .thenReturn(Stream.empty()) + .thenReturn(Stream.empty()); + + Set> result = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(freshMockAspectDao); + + assertNotNull(result); + assertTrue(result.isEmpty()); + } + + @Test + public void testGetActiveStructuredPropertiesDefinitionsAllRemoved() { + // Create a fresh mock for this test + AspectDao freshMockAspectDao = mock(AspectDao.class); + + // Mock EntityAspect for removed structured property + EntityAspect removedAspect = mock(EntityAspect.class); + Urn removedUrn = UrnUtils.getUrn("urn:li:structuredProperty:removed"); + StructuredPropertyDefinition removedDefinition = new StructuredPropertyDefinition(); + removedDefinition.setQualifiedName("removed.property"); + + Status removedStatus = new Status(); + removedStatus.setRemoved(true); + + when(removedAspect.getUrn()).thenReturn(removedUrn.toString()); + when(removedAspect.getMetadata()).thenReturn(RecordUtils.toJsonString(removedStatus)); + + // Mock AspectDao to return streams for both calls (removed properties and all properties) + when(freshMockAspectDao.streamAspects(anyString(), anyString())) + .thenReturn(Stream.of(removedAspect)) // First call for removed properties + .thenReturn(Stream.of(removedAspect)); // Second call for all properties + + Set> result = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(freshMockAspectDao); + + assertNotNull(result); + assertTrue(result.isEmpty()); + } + + @Test + public void testGetActiveStructuredPropertiesDefinitionsWithNullStatus() { + // Create a fresh mock for this test + AspectDao freshMockAspectDao = mock(AspectDao.class); + + // Mock EntityAspect with null status (should be treated as active) + EntityAspect aspectWithNullStatus = mock(EntityAspect.class); + Urn urn = UrnUtils.getUrn("urn:li:structuredProperty:nullstatus"); + StructuredPropertyDefinition definition = new StructuredPropertyDefinition(); + definition.setQualifiedName("nullstatus.property"); + + when(aspectWithNullStatus.getUrn()).thenReturn(urn.toString()); + when(aspectWithNullStatus.getMetadata()).thenReturn(RecordUtils.toJsonString(definition)); + + // Mock AspectDao to return streams for both calls (removed properties and all properties) + when(freshMockAspectDao.streamAspects(anyString(), anyString())) + .thenReturn(Stream.empty()) // First call for removed properties (none) + .thenReturn(Stream.of(aspectWithNullStatus)); // Second call for all properties + + Set> result = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(freshMockAspectDao); + + assertNotNull(result); + assertEquals(result.size(), 1); + + // Verify property with null status is returned (treated as active) + Pair pair = result.iterator().next(); + assertEquals(pair.getFirst(), urn); + assertEquals(pair.getSecond().getQualifiedName(), definition.getQualifiedName()); + } + + @Test + public void testGetActiveStructuredPropertiesDefinitionsWithException() { + // Create a fresh mock for this test + AspectDao freshMockAspectDao = mock(AspectDao.class); + + // Mock AspectDao to throw exception + when(freshMockAspectDao.streamAspects(anyString(), anyString())) + .thenThrow(new RuntimeException("Database error")); + + assertThrows( + RuntimeException.class, + () -> + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(freshMockAspectDao)); + } + + @Test + public void testCreateElasticSearchIndexedServicesWithActualInstances() { + // Create actual mock instances that implement ElasticSearchIndexed + GraphService graphService = + mock(GraphService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + EntitySearchService searchService = + mock(EntitySearchService.class, withSettings().extraInterfaces(ElasticSearchIndexed.class)); + + List result = + ElasticSearchUpgradeUtils.createElasticSearchIndexedServices( + graphService, searchService, mockSystemMetadataService, mockTimeseriesAspectService); + + assertNotNull(result); + assertEquals(result.size(), 2); + assertTrue(result.contains(graphService)); + assertTrue(result.contains(searchService)); + } + + @Test + public void testGetActiveStructuredPropertiesDefinitionsMultipleActive() { + // Create a fresh mock for this test + AspectDao freshMockAspectDao = mock(AspectDao.class); + + // Mock multiple active structured properties + EntityAspect aspect1 = mock(EntityAspect.class); + Urn urn1 = UrnUtils.getUrn("urn:li:structuredProperty:prop1"); + StructuredPropertyDefinition def1 = new StructuredPropertyDefinition(); + def1.setQualifiedName("prop1"); + + EntityAspect aspect2 = mock(EntityAspect.class); + Urn urn2 = UrnUtils.getUrn("urn:li:structuredProperty:prop2"); + StructuredPropertyDefinition def2 = new StructuredPropertyDefinition(); + def2.setQualifiedName("prop2"); + + Status activeStatus = new Status(); + activeStatus.setRemoved(false); + + when(aspect1.getUrn()).thenReturn(urn1.toString()); + when(aspect1.getMetadata()).thenReturn("{}"); // JSON string representation + + when(aspect2.getUrn()).thenReturn(urn2.toString()); + when(aspect2.getMetadata()).thenReturn("{}"); // JSON string representation + + when(freshMockAspectDao.streamAspects(anyString(), anyString())) + .thenReturn(Stream.empty()) // First call for removed properties (none) + .thenReturn(Stream.of(aspect1, aspect2)); // Second call for all properties + + Set> result = + ElasticSearchUpgradeUtils.getActiveStructuredPropertiesDefinitions(freshMockAspectDao); + + assertNotNull(result); + assertEquals(result.size(), 2); + + // Verify both properties are returned + assertTrue(result.stream().anyMatch(pair -> pair.getFirst().equals(urn1))); + assertTrue(result.stream().anyMatch(pair -> pair.getFirst().equals(urn2))); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/cron/TweakReplicasStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/cron/TweakReplicasStepTest.java index 75d94aa05b0f..7556a666f545 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/cron/TweakReplicasStepTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/cron/TweakReplicasStepTest.java @@ -10,6 +10,7 @@ import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.upgrade.DataHubUpgradeState; import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -30,6 +31,8 @@ public class TweakReplicasStepTest { @Mock private UpgradeContext mockContext; + @Mock private OperationContext mockOpContext; + @Mock private Urn mockUrn; @Mock private StructuredPropertyDefinition mockPropertyDef; @@ -118,6 +121,7 @@ public void testExecutableSuccess() { parsedArgs.put("dryRun", Optional.of("true")); Mockito.when(mockContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(mockContext.opContext()).thenReturn(mockOpContext); UpgradeStepResult result = tweakReplicasStep.executable().apply(mockContext); @@ -135,6 +139,7 @@ public void testExecutableWithException() { Map> parsedArgs = new HashMap<>(); Mockito.when(mockContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(mockContext.opContext()).thenReturn(mockOpContext); Mockito.doThrow(new RuntimeException("Test exception")) .when(mockService) .tweakReplicasAll(ArgumentMatchers.any(), ArgumentMatchers.anyBoolean()); @@ -155,6 +160,7 @@ public void testExecutableWithMultipleServices() { Map> parsedArgs = new HashMap<>(); Mockito.when(mockContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(mockContext.opContext()).thenReturn(mockOpContext); UpgradeStepResult result = tweakReplicasStep.executable().apply(mockContext); diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStepTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStepTest.java index a0106ad481fb..9902b3897a84 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStepTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/ReindexDebugStepTest.java @@ -11,6 +11,8 @@ import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.upgrade.DataHubUpgradeState; import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.io.IOException; import java.util.*; import java.util.function.Function; @@ -45,6 +47,7 @@ public class ReindexDebugStepTest { private ReindexDebugStep reindexDebugStep; private List services; private Set> structuredProperties; + private OperationContext opContext = TestOperationContexts.systemContextNoValidate(); @BeforeMethod public void setUp() { @@ -248,6 +251,7 @@ public void testExecutable_Success() throws IOException, IllegalAccessException Map> parsedArgs = new HashMap<>(); parsedArgs.put("index", Optional.of("datahubpolicyindex")); Mockito.when(upgradeContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(upgradeContext.opContext()).thenReturn(opContext); List configs = Arrays.asList(reindexConfig1, reindexConfig2); Mockito.when(elasticSearchService.buildReindexConfigs(structuredProperties)) @@ -276,6 +280,7 @@ public void testExecutable_BuildIndexThrowsIOException() Map> parsedArgs = new HashMap<>(); parsedArgs.put("index", Optional.of("datahubpolicyindex")); Mockito.when(upgradeContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(upgradeContext.opContext()).thenReturn(opContext); List configs = Arrays.asList(reindexConfig1); Mockito.when(elasticSearchService.buildReindexConfigs(structuredProperties)) @@ -303,6 +308,7 @@ public void testExecutable_SetConfigThrowsRuntimeException() Map> parsedArgs = new HashMap<>(); parsedArgs.put("index", Optional.of("datahubpolicyindex")); Mockito.when(upgradeContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(upgradeContext.opContext()).thenReturn(opContext); RuntimeException exception = new RuntimeException("Config access denied"); Mockito.when(elasticSearchService.buildReindexConfigs(structuredProperties)) @@ -470,6 +476,7 @@ public void testExecutable_BuildReindexConfigsThrowsIOException() Map> parsedArgs = new HashMap<>(); parsedArgs.put("index", Optional.of("test_index")); Mockito.when(upgradeContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(upgradeContext.opContext()).thenReturn(opContext); IOException exception = new IOException("Failed to build configs"); Mockito.when(elasticSearchService.buildReindexConfigs(structuredProperties)) @@ -519,6 +526,7 @@ public void testExecutable_SuccessWithMultipleMatchingConfigs() Map> parsedArgs = new HashMap<>(); parsedArgs.put("index", Optional.of("test")); Mockito.when(upgradeContext.parsedArgs()).thenReturn(parsedArgs); + Mockito.when(upgradeContext.opContext()).thenReturn(opContext); List configs = Arrays.asList(reindexConfig1, reindexConfig2, reindexConfig3); Mockito.when(elasticSearchService.buildReindexConfigs(structuredProperties)) diff --git a/datahub-web-react/src/alchemy-components/components/Editor/toolbar/AddImageButton.tsx b/datahub-web-react/src/alchemy-components/components/Editor/toolbar/AddImageButton.tsx index 011ab9b6d945..3961c89df960 100644 --- a/datahub-web-react/src/alchemy-components/components/Editor/toolbar/AddImageButton.tsx +++ b/datahub-web-react/src/alchemy-components/components/Editor/toolbar/AddImageButton.tsx @@ -40,7 +40,14 @@ export const AddImageButton = () => { commandName="insertImage" onClick={handleButtonClick} /> - +
{ }; return ( - + ; +}; + +function createLocation(pathname: string, search: string): Location { + return { pathname, search, state: null, key: 'test' } as unknown as Location; +} + +describe('updateQueryParams', () => { + let history: MockHistory; + + beforeEach(() => { + history = { replace: vi.fn() }; + }); + + const getReplaceArgs = () => (history.replace as any).mock.calls[0][0]; + + it('preserves plus-encoded values (3%2B) from existing params', () => { + const location = createLocation('/path', '?q=3%2B'); + + updateQueryParams({}, location, history as any); + + expect(getReplaceArgs()).toEqual({ + pathname: '/path', + search: 'q=3%2B', + }); + }); + + it('does not convert plus-encoded (3%2B) into space-encoded (3%20) when merging', () => { + const location = createLocation('/search', '?q=3%2B'); + + updateQueryParams({ page: '1' }, location, history as any); + + const args = getReplaceArgs(); + expect(args.pathname).toBe('/search'); + expect(args.search).toContain('q=3%2B'); + expect(args.search).not.toContain('%20'); + expect(args.search).toContain('page=1'); + }); +}); diff --git a/datahub-web-react/src/app/shared/updateQueryParams.ts b/datahub-web-react/src/app/shared/updateQueryParams.ts index 8a10e60a8a0d..9605c2dff27a 100644 --- a/datahub-web-react/src/app/shared/updateQueryParams.ts +++ b/datahub-web-react/src/app/shared/updateQueryParams.ts @@ -5,13 +5,14 @@ type QueryParam = { [key: string]: string | undefined; }; +// Doesn't support the newParams with special characters export default function updateQueryParams(newParams: QueryParam, location: Location, history: History) { - const parsedParams = QueryString.parse(location.search, { arrayFormat: 'comma' }); + const parsedParams = QueryString.parse(location.search, { arrayFormat: 'comma', decode: false }); const updatedParams = { ...parsedParams, ...newParams, }; - const stringifiedParams = QueryString.stringify(updatedParams, { arrayFormat: 'comma' }); + const stringifiedParams = QueryString.stringify(updatedParams, { arrayFormat: 'comma', encode: false }); history.replace({ pathname: location.pathname, diff --git a/datahub-web-react/src/images/greatexpectationslogo.png b/datahub-web-react/src/images/greatexpectationslogo.png index 1057c3472f4d..bc7da9cb890e 100644 Binary files a/datahub-web-react/src/images/greatexpectationslogo.png and b/datahub-web-react/src/images/greatexpectationslogo.png differ diff --git a/docker/build.gradle b/docker/build.gradle index 5d4121b7cc6d..c63cffdf2019 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -204,14 +204,92 @@ ext { ] // only for debug variants of quickstart to enable Reload tasks. - // The actual service name needs the profile to be appended, - - // This list only contains modules that can be reloaded via the reloadTask. If other modules need to be reloaded, quickstart* needs to be used. + // The name here is the service name prefix. The suffix doesnt follow a fixed convention across all profiles. + // This list only contains modules that can be reloaded via the reloadTask. Python services support hot reload. + // To re-run setup tasks, quickstart* needs to be used. moduleToContainer = [ ':metadata-service:war': 'datahub-gms', ':datahub-frontend': 'frontend', ':metadata-jobs:mce-consumer-job': 'datahub-mce-consumer', ':metadata-jobs:mae-consumer-job': 'datahub-mae-consumer', ] + // Though these support hot reload, they need to be restarted if any ENVs have been modified and ReloadEnv is run. + moduleToContainerWithHotReload = [ + ':datahub-actions': 'datahub-actions', + ] + + // Helper function to read the captured profile name from file + readCapturedProfile = { + def composeFileName = new File(compose_base).getName() + def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt') + def profileFile = new File(rootProject.buildDir, profileStatusFileName) + + if (!profileFile.exists()) { + return null + } + + def activeProfile = profileFile.text.trim() + logger.lifecycle("Using captured profile: ${activeProfile}") + return activeProfile + } + + // Helper function to find the taskName and config that uses a given profile (only searches debug tasks) + // Returns a map with 'taskName' and 'config' keys + findTaskNameByProfile = { profile -> + def matchingTask = quickstart_configs.find { taskName, config -> + config.isDebug && config.profile == profile + } + + if (matchingTask) { + return [taskName: matchingTask.key, config: matchingTask.value] + } else { + throw new GradleException("No debug quickstart configuration found for profile: ${profile}") + } + } + + // Helper function to get running container service names by matching prefixes from moduleToContainer + // Returns a map of [modulePath: actualServiceName] for only the running containers + // When includeHotReload is true, also includes modules from moduleToContainerWithHotReload + getRunningContainers = { composeFilePath, projectName, includeHotReload = false -> + // Run docker compose ps to get running services + def psCmd = "docker compose -p ${projectName} -f docker/${composeFilePath} ps --services --filter status=running" + def process = psCmd.execute() + process.waitFor() + + if (process.exitValue() != 0) { + throw new GradleException("Failed to list running containers: ${process.err.text}") + } + + def runningServices = process.text.trim().split('\n').collect { it.trim() }.findAll { it } + logger.lifecycle("Running services: ${runningServices}") + + // Match prefixes from moduleToContainer to find actual service names + def runningModuleToContainer = [:] + moduleToContainer.each { modulePath, servicePrefix -> + def matched = runningServices.find { service -> service.startsWith(servicePrefix) } + if (matched) { + runningModuleToContainer[modulePath] = matched + logger.lifecycle("Matched module '${modulePath}' (prefix: '${servicePrefix}') to running service '${matched}'") + } else { + logger.info("Module '${modulePath}' (prefix: '${servicePrefix}') is not running, skipping") + } + } + + // Include hot reload modules if requested + if (includeHotReload) { + moduleToContainerWithHotReload.each { modulePath, servicePrefix -> + def matched = runningServices.find { service -> service.startsWith(servicePrefix) } + if (matched) { + runningModuleToContainer[modulePath] = matched + logger.lifecycle("Matched hot-reload module '${modulePath}' (prefix: '${servicePrefix}') to running service '${matched}'") + } else { + logger.info("Hot-reload module '${modulePath}' (prefix: '${servicePrefix}') is not running, skipping") + } + } + } + + return runningModuleToContainer + } } // Register all quickstart tasks @@ -422,6 +500,16 @@ quickstart_configs.each { taskName, config -> doFirst { // Set removeVolumes for this specific configuration dockerCompose."${taskName}".removeVolumes = !config.preserveVolumes + // Delete the captured profile file after compose down + def composeFileName = new File(compose_base).getName() + def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt') + def profileFile = new File(rootProject.buildDir, profileStatusFileName) + + if (profileFile.exists()) { + profileFile.delete() + logger.lifecycle("Deleted profile file: ${profileFile.absolutePath}") + } + } finalizedBy(tasks.matching { task -> task.name == "${taskName}ComposeDownForced" @@ -432,75 +520,174 @@ quickstart_configs.each { taskName, config -> tasks.register('quickstartDown') { group = 'quickstart' finalizedBy(tasks.withType(ComposeDownForced)) + doLast { + // Delete the captured profile file after compose down + def composeFileName = new File(compose_base).getName() + def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt') + def profileFile = new File(rootProject.buildDir, profileStatusFileName) + + if (profileFile.exists()) { + profileFile.delete() + logger.lifecycle("Deleted profile file: ${profileFile.absolutePath}") + } + } } tasks.withType(ComposeUp).configureEach { shouldRunAfter('quickstartNuke') dependsOn tasks.named("minDockerCompose2.20") + + // Capture profile information to a file based on compose file name when ComposeUp runs + doFirst { + // Only capture profile if the task is being directly executed (not just accessed/configured) + // Extract task name to find corresponding config (e.g., "quickstartDebugComposeUp" -> "quickstartDebug") + def taskBaseName = name.replaceAll('ComposeUp$', '') + def config = quickstart_configs[taskBaseName] + + if (config?.profile) { + // Get the compose file name and derive the profile tracking filename + def composeFileName = new File(compose_base).getName() + def profileStatusFileName = composeFileName.replace('.yml', '-profile.txt') + def profileFile = new File(rootProject.buildDir, profileStatusFileName) + + // Ensure build directory exists + profileFile.getParentFile().mkdirs() + + // Write the profile name + profileFile.text = config.profile + + logger.lifecycle("Captured profile '${config.profile}' to ${profileFile.absolutePath}") + } + } } -// Register all quickstart Reload tasks. For quickstartDebug, the reload task is DebugReload. (Taskname without quickstart prefix) -quickstart_configs.each { taskName, config -> - if (config.isDebug) { - def reloadTaskName = taskName.replaceFirst(/^quickstart/, "") - tasks.register("${reloadTaskName}Reload", Exec) { - dependsOn tasks.named("prepareAll${taskName}") - group = 'quickstart' - description = "Build and reload only changed containers for the ${taskName} task" - doFirst { - def executedTasks = project.gradle.taskGraph.allTasks.findAll { it.state.executed } - def containersToRestart = [] - - moduleToContainer.each { modulePath, containerName -> - def moduleProject = project.project(modulePath) - def dockerPrepareTask = moduleProject.tasks.findByName('dockerPrepare') - - if (dockerPrepareTask && executedTasks.contains(dockerPrepareTask) && !dockerPrepareTask.state.upToDate) { - containersToRestart << "${containerName}-${config.profile}" - } - } +tasks.register("reload", Exec) { + group = 'quickstart' + description = "Build and reload only changed containers for the active profile" + // Read the captured profile name from file + def activeProfile = readCapturedProfile.call() + def matchingTask = null + def matchingTaskName = null + def matchingConfig = null + + if (activeProfile) { + // Find the task and config that matches this profile + matchingTask = findTaskNameByProfile.call(activeProfile) + matchingTaskName = matchingTask.taskName + matchingConfig = matchingTask.config + + // Dynamically depend on the correct prepareAll task + dependsOn tasks.named("prepareAll${matchingTaskName}") + } - // Only restart containers that had their modules rebuilt - if (containersToRestart) { - def composeFiles = dockerCompose."${taskName}".useComposeFiles.get() - def composeFileArgs = composeFiles.collectMany { ['-f', it] } + doFirst { + if (!activeProfile){ + throw new GradleException("Could not detect running profile. reload[Env] is supported only when one of the :docker:quickstartDebug* task is running.") + } + logger.lifecycle("Active profile '${activeProfile}' maps to task: ${matchingTaskName}") - // Use the actual project name from config, fallback to default - def actualProjectName = config.additionalConfig?.projectName ?: project_name + // Get running containers by matching prefixes (exclude hot reload modules) + def actualProjectName = matchingConfig.additionalConfig?.projectName ?: project_name + def runningModuleToContainer = getRunningContainers.call(compose_base, actualProjectName, false) - def cmd = ["docker compose -p ${actualProjectName} --profile ${config.profile}"] + composeFileArgs + ['restart'] + containersToRestart - println(cmd.join(" ")) - commandLine 'bash', '-c', cmd.join(" ") - } else { - // If no containers need restart, make this a no-op - commandLine 'bash', '-c', 'echo "No containers need restarting - all modules are up to date"' - } + def executedTasks = project.gradle.taskGraph.allTasks.findAll { it.state.executed } + def containersToRestart = [] + + runningModuleToContainer.each { modulePath, actualServiceName -> + def moduleProject = project.project(modulePath) + def dockerPrepareTask = moduleProject.tasks.findByName('dockerPrepare') + + if (dockerPrepareTask && executedTasks.contains(dockerPrepareTask) && !dockerPrepareTask.state.upToDate) { + containersToRestart << actualServiceName } } - tasks.register("${reloadTaskName}ReloadEnv", Exec) { - dependsOn tasks.named("prepareAll${taskName}") - group = 'quickstart' - description = "Build changed containers but recreate all services for the ${taskName} task" - doFirst { - def containersToRestart = [] - moduleToContainer.each { modulePath, containerName -> - // Find which of of the reloadable modules are in used in this task - if (config.modules.contains(modulePath)) { - containersToRestart << "${containerName}-${config.profile}" - } - } - def composeFiles = dockerCompose."${taskName}".useComposeFiles.get() - def composeFileArgs = composeFiles.collectMany { ['-f', it] } - // Use the actual project name from config, fallback to default - def actualProjectName = config.additionalConfig?.projectName ?: project_name + // Only restart containers that had their modules rebuilt + if (containersToRestart) { + def cmd = ["docker compose -p ${actualProjectName} --profile ${activeProfile} -f ${compose_base} restart"] + containersToRestart + println(cmd.join(" ")) + commandLine 'bash', '-c', cmd.join(" ") + } else { + // If no containers need restart, make this a no-op + commandLine 'bash', '-c', 'echo "No containers need restarting - all modules are up to date"' + } + } +} +tasks.register("reloadEnv", Exec) { + group = 'quickstart' + description = "Build changed containers but recreate all services for the active profile" + // Read the captured profile name from file + def activeProfile = readCapturedProfile.call() + def matchingTask = null + def matchingTaskName = null + def matchingConfig = null + + if (activeProfile) { + // Find the task and config that matches this profile + matchingTask = findTaskNameByProfile.call(activeProfile) + matchingTaskName = matchingTask.taskName + matchingConfig = matchingTask.config + // Dynamically depend on the correct prepareAll task + dependsOn tasks.named("prepareAll${matchingTaskName}") + } + + doFirst { + if (!activeProfile){ + throw new GradleException("Could not detect running profile. reload[Env] is supported only when one of the :docker:quickstartDebug* task is running.") + } + logger.lifecycle("Active profile '${activeProfile}' maps to task: ${matchingTaskName}") + + // Get running containers by matching prefixes (include hot reload modules) + def actualProjectName = matchingConfig.additionalConfig?.projectName ?: project_name + def runningModuleToContainer = getRunningContainers.call(compose_base, actualProjectName, true) - def cmd = ["docker compose -p ${actualProjectName} --profile ${config.profile}"] + composeFileArgs + ['up', '-d', '--no-deps'] + containersToRestart - println(cmd.join(" ")) - commandLine 'bash', '-c', cmd.join(" ") + def containersToRestart = [] + runningModuleToContainer.each { modulePath, actualServiceName -> + // Find which of the reloadable modules are used in this task + if (matchingConfig.modules.contains(modulePath)) { + containersToRestart << actualServiceName + } + } + // Specify all environment variables specified during quickstart* for reloadEnv as well since containers are re-created. + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}" + environment.put 'DATAHUB_APP_VERSION', System.getenv("DATAHUB_APP_VERSION") ?: "v${version}" + environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' + environment.put "METADATA_TESTS_ENABLED", "true" + environment.put "DATAHUB_REPO", "${docker_registry}" + + // Additional environment variables if specified + if (matchingConfig.additionalEnv) { + matchingConfig.additionalEnv.each { key, value -> + environment.put key, value } } + + def composeFiles = dockerCompose."${matchingTaskName}".useComposeFiles.get() + def composeFileArgs = composeFiles.collectMany { ['-f', it] } + + def cmd = ["docker compose -p ${actualProjectName} --profile ${activeProfile} -f ${compose_base} up -d --no-deps"] + containersToRestart + println(cmd.join(" ")) + commandLine 'bash', '-c', cmd.join(" ") + } +} + +// Redirects from existing tasks as for a transition period. To be deprecated +quickstart_configs.each { taskName, config -> + if (config.isDebug) { + def reloadTaskName = taskName.replaceFirst(/^quickstart/, "") + tasks.register("${reloadTaskName}Reload") { + dependsOn tasks.named("reload") + doLast{ + logger.lifecycle("⚠️Depcrecated: Just run 'reload' instead of '${reloadTaskName}Reload' task - it will auto-detect the active profile and restart the modified services") + } + } + tasks.register("${reloadTaskName}ReloadEnv") { + dependsOn tasks.named("reloadEnv") + doLast{ + logger.lifecycle("⚠️Deprecated: Just run 'reloadEnv' instead of '${reloadTaskName}ReloadEnv' task - it will auto-detect the active profile and recreate the modified services") + } } + } } // :docker:build builds all docker images. diff --git a/docker/datahub-actions/Dockerfile b/docker/datahub-actions/Dockerfile index b9d90262dff4..43d867c3ed5e 100644 --- a/docker/datahub-actions/Dockerfile +++ b/docker/datahub-actions/Dockerfile @@ -126,10 +126,51 @@ RUN --mount=type=bind,source=./docker/snippets/oracle_instantclient.sh,target=/o USER datahub # INLINE-END +# ============================================================================= +# PRE-BUILD BUNDLED INGESTION VENVS +# ============================================================================= + +FROM ingestion-base-slim AS bundled-vEnvs +USER 0 + +# Set up bundled venv configuration +ARG BUNDLED_VENV_PLUGINS="s3,demo-data" +ARG BUNDLED_CLI_VERSION +ENV DATAHUB_BUNDLED_VENV_PATH=/opt/datahub/venvs +ENV BUNDLED_VENV_PLUGINS=${BUNDLED_VENV_PLUGINS} +ENV BUNDLED_CLI_VERSION=${BUNDLED_CLI_VERSION} + +# Create venv directory +RUN mkdir -p $DATAHUB_BUNDLED_VENV_PATH && \ + chown -R datahub:datahub $DATAHUB_BUNDLED_VENV_PATH + +# Copy the self-contained venv build scripts +COPY --chown=datahub:datahub ./docker/snippets/ingestion/build_bundled_venvs_unified.py /tmp/ +COPY --chown=datahub:datahub ./docker/snippets/ingestion/build_bundled_venvs_unified.sh /tmp/ + +# Make scripts executable +RUN chmod +x /tmp/build_bundled_venvs_unified.sh && \ + chmod +x /tmp/build_bundled_venvs_unified.py + +USER datahub + +# Build bundled venvs using our self-contained script +WORKDIR /tmp +RUN ./build_bundled_venvs_unified.sh + +USER datahub + +# ============================================================================= +# END BUNDLED VENVS SECTION +# ============================================================================= + FROM ingestion-base-${APP_ENV} AS final USER root +ENV DATAHUB_BUNDLED_VENV_PATH=/opt/datahub/venvs +COPY --from=bundled-vEnvs $DATAHUB_BUNDLED_VENV_PATH $DATAHUB_BUNDLED_VENV_PATH + COPY --from=powerman/dockerize:0.24 /usr/local/bin/dockerize /usr/local/bin COPY --chown=datahub:datahub ./docker/datahub-actions/start.sh /start_datahub_actions.sh COPY --chown=datahub:datahub ./docker/datahub-actions/readiness-check.sh /readiness-check.sh @@ -170,46 +211,5 @@ RUN --mount=type=bind,source=./python-build/version_updater.py,target=/version_u # really impact image size. RUN uv pip install -e '/metadata-ingestion/' -e '/datahub-actions/[all]' -# ============================================================================= -# PRE-BUILD BUNDLED INGESTION VENVS -# ============================================================================= - -USER 0 - -# Set up bundled venv configuration -ARG BUNDLED_VENV_PLUGINS="s3,demo-data" -ARG BUNDLED_CLI_VERSION -ENV DATAHUB_BUNDLED_VENV_PATH=/opt/datahub/venvs -ENV BUNDLED_VENV_PLUGINS=${BUNDLED_VENV_PLUGINS} -ENV BUNDLED_CLI_VERSION=${BUNDLED_CLI_VERSION} - -# Create venv directory -RUN mkdir -p $DATAHUB_BUNDLED_VENV_PATH && \ - chown -R datahub:datahub $DATAHUB_BUNDLED_VENV_PATH - -# Copy the self-contained venv build scripts -COPY --chown=datahub:datahub ./docker/snippets/ingestion/build_bundled_venvs_unified.py /tmp/ -COPY --chown=datahub:datahub ./docker/snippets/ingestion/build_bundled_venvs_unified.sh /tmp/ - -# Make scripts executable -RUN chmod +x /tmp/build_bundled_venvs_unified.sh && \ - chmod +x /tmp/build_bundled_venvs_unified.py - -USER datahub - -# Build bundled venvs using our self-contained script -WORKDIR /tmp -RUN ./build_bundled_venvs_unified.sh - -# Remove build scripts to reduce image size -USER 0 -RUN rm -rf /tmp/build_bundled_venvs_unified.py /tmp/build_bundled_venvs_unified.sh - -USER datahub - -# ============================================================================= -# END BUNDLED VENVS SECTION -# ============================================================================= - ENTRYPOINT [ ] CMD dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s /start_datahub_actions.sh diff --git a/docker/datahub-frontend/start.sh b/docker/datahub-frontend/start.sh index a1cc436a0c19..85f1777c7c36 100755 --- a/docker/datahub-frontend/start.sh +++ b/docker/datahub-frontend/start.sh @@ -49,8 +49,6 @@ export JAVA_OPTS="${JAVA_MEMORY_OPTS:-"-Xms512m -Xmx1024m"} \ -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \ -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \ -Dlogback.debug=false \ - --add-opens java.base/java.lang=ALL-UNNAMED \ - --add-opens=java.base/java.util=ALL-UNNAMED \ ${PROMETHEUS_AGENT:-} ${OTEL_AGENT:-} \ ${TRUSTSTORE_FILE:-} ${TRUSTSTORE_TYPE:-} ${TRUSTSTORE_PASSWORD:-} \ ${HTTP_PROXY:-} ${HTTPS_PROXY:-} ${NO_PROXY:-} \ diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 1b2bc175135b..740b609a13f6 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -1020,6 +1020,7 @@ module.exports = { }, "docs/how/backup-datahub", "docs/how/restore-indices", + "docs/how/load-indices", "docs/advanced/db-retention", "docs/advanced/monitoring", "docs/deploy/telemetry", diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index 1f127d6701d1..c0ac2729adeb 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -10464,9 +10464,9 @@ tapable@^2.0.0, tapable@^2.1.1, tapable@^2.2.0, tapable@^2.2.1: integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== tar-fs@^2.0.0, tar-fs@^2.1.1: - version "2.1.3" - resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.3.tgz#fb3b8843a26b6f13a08e606f7922875eb1fbbf92" - integrity sha512-090nwYJDmlhwFwEW3QQl+vaNnxsO2yVsd45eTKRBzSzu+hlb1w2K9inVq5b0ngXuLVqQ4ApvsUHHnu/zQNkWAg== + version "2.1.4" + resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.4.tgz#800824dbf4ef06ded9afea4acafe71c67c76b930" + integrity sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ== dependencies: chownr "^1.1.1" mkdirp-classic "^0.5.2" diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 4cfc008ec249..75b6de3243bc 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -245,6 +245,7 @@ Reference Links: | `AWS_REGION` | `null` | AWS region | GMS, MAE Consumer, MCE Consumer, System Update | | `ELASTICSEARCH_IMPLEMENTATION` | `elasticsearch` | Implementation (elasticsearch or opensearch) | GMS, MAE Consumer, MCE Consumer, System Update | | `ELASTIC_ID_HASH_ALGO` | `MD5` | ID hash algorithm | GMS, MAE Consumer, MCE Consumer, System Update | +| `ELASTICSEARCH_DATA_NODE_COUNT` | `1` | Number of Elasticsearch data nodes | GMS, MAE Consumer, MCE Consumer, System Update | #### SSL Context Configuration @@ -288,27 +289,27 @@ Reference Links: #### Build Indices Configuration -| Environment Variable | Default | Description | Components | -| ---------------------------------------------------------- | ------- | ----------------------------------------------------------- | ------------- | -| `ELASTICSEARCH_BUILD_INDICES_ALLOW_DOC_COUNT_MISMATCH` | `false` | Allow document count mismatch when clone indices is enabled | System Update | -| `ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES` | `true` | Clone indices | System Update | -| `ELASTICSEARCH_BUILD_INDICES_RETENTION_UNIT` | `DAYS` | Retention unit for indices | System Update | -| `ELASTICSEARCH_BUILD_INDICES_RETENTION_VALUE` | `60` | Retention value for indices | System Update | -| `ELASTICSEARCH_BUILD_INDICES_REINDEX_OPTIMIZATION_ENABLED` | `true` | Enable reindex optimization | System Update | -| `ELASTICSEARCH_NUM_SHARDS_PER_INDEX` | `1` | Number of shards per index | System Update | -| `ELASTICSEARCH_NUM_REPLICAS_PER_INDEX` | `1` | Number of replicas per index | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_NUM_RETRIES` | `3` | Index builder number of retries | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_REFRESH_INTERVAL_SECONDS` | `3` | Index builder refresh interval | System Update | -| `SEARCH_DOCUMENT_MAX_ARRAY_LENGTH` | `1000` | Maximum array length in search documents | System Update | -| `SEARCH_DOCUMENT_MAX_OBJECT_KEYS` | `1000` | Maximum object keys in search documents | System Update | -| `SEARCH_DOCUMENT_MAX_VALUE_LENGTH` | `4096` | Maximum value length in search documents | System Update | -| `ELASTICSEARCH_MAIN_TOKENIZER` | `null` | Main tokenizer | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX` | `false` | Enable mappings reindex | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX` | `false` | Enable settings reindex | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_MAX_REINDEX_HOURS` | `0` | Maximum reindex hours (0 = no timeout) | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_SETTINGS_OVERRIDES` | `null` | Index builder settings overrides | System Update | -| `ELASTICSEARCH_MIN_SEARCH_FILTER_LENGTH` | `3` | Minimum search filter length | System Update | -| `ELASTICSEARCH_INDEX_BUILDER_ENTITY_SETTINGS_OVERRIDES` | `null` | Entity settings overrides | System Update | +| Environment Variable | Default | Description | Components | +| ---------------------------------------------------------- | -------------------------------- | ----------------------------------------------------------- | ------------- | +| `ELASTICSEARCH_BUILD_INDICES_ALLOW_DOC_COUNT_MISMATCH` | `false` | Allow document count mismatch when clone indices is enabled | System Update | +| `ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES` | `true` | Clone indices | System Update | +| `ELASTICSEARCH_BUILD_INDICES_RETENTION_UNIT` | `DAYS` | Retention unit for indices | System Update | +| `ELASTICSEARCH_BUILD_INDICES_RETENTION_VALUE` | `60` | Retention value for indices | System Update | +| `ELASTICSEARCH_BUILD_INDICES_REINDEX_OPTIMIZATION_ENABLED` | `true` | Enable reindex optimization | System Update | +| `ELASTICSEARCH_NUM_SHARDS_PER_INDEX` | `${elasticsearch.dataNodeCount}` | Number of shards per index, defaults to dataNodeCount | System Update | +| `ELASTICSEARCH_NUM_REPLICAS_PER_INDEX` | `1` | Number of replicas per index | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_NUM_RETRIES` | `3` | Index builder number of retries | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_REFRESH_INTERVAL_SECONDS` | `3` | Index builder refresh interval | System Update | +| `SEARCH_DOCUMENT_MAX_ARRAY_LENGTH` | `1000` | Maximum array length in search documents | System Update | +| `SEARCH_DOCUMENT_MAX_OBJECT_KEYS` | `1000` | Maximum object keys in search documents | System Update | +| `SEARCH_DOCUMENT_MAX_VALUE_LENGTH` | `4096` | Maximum value length in search documents | System Update | +| `ELASTICSEARCH_MAIN_TOKENIZER` | `null` | Main tokenizer | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX` | `false` | Enable mappings reindex | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX` | `false` | Enable settings reindex | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_MAX_REINDEX_HOURS` | `0` | Maximum reindex hours (0 = no timeout) | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_SETTINGS_OVERRIDES` | `null` | Index builder settings overrides | System Update | +| `ELASTICSEARCH_MIN_SEARCH_FILTER_LENGTH` | `3` | Minimum search filter length | System Update | +| `ELASTICSEARCH_INDEX_BUILDER_ENTITY_SETTINGS_OVERRIDES` | `null` | Entity settings overrides | System Update | #### Search Configuration @@ -333,21 +334,21 @@ Reference Links: #### Graph Search Configuration -| Environment Variable | Default | Description | Components | -| ----------------------------------------------------------- | ------- | ------------------------------------------------------------------------------- | ---------- | -| `ELASTICSEARCH_SEARCH_GRAPH_TIMEOUT_SECONDS` | `50` | Graph DAO timeout seconds | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_BATCH_SIZE` | `1000` | Graph DAO batch size | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_MULTI_PATH_SEARCH` | `false` | Allow path retraversal for all paths | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_BOOST_VIA_NODES` | `true` | Boost graph edges with via nodes | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_STATUS_ENABLED` | `false` | Enable soft delete tracking of URNs on edges | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_LINEAGE_MAX_HOPS` | `20` | Maximum hops to traverse lineage graph | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_HOPS` | `1000` | Maximum hops to traverse for impact analysis (impact.maxHops) | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_RELATIONS` | `40000` | Maximum number of relationships for impact analysis (impact.maxRelations) | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_SLICES` | `2` | Number of slices for parallel search operations (impact.slices) | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_KEEP_ALIVE` | `5m` | Point-in-Time keepAlive duration for impact analysis queries (impact.keepAlive) | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_THREADS` | `32` | Maximum parallel lineage graph queries | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_QUERY_OPTIMIZATION` | `true` | Reduce query nesting if possible | GMS | -| `ELASTICSEARCH_SEARCH_GRAPH_POINT_IN_TIME_CREATION_ENABLED` | `true` | Enable creation of point in time snapshots for graph queries | GMS | +| Environment Variable | Default | Description | Components | +| ----------------------------------------------------------- | -------------------------------- | ----------------------------------------------------------------------------------------------------- | ---------- | +| `ELASTICSEARCH_SEARCH_GRAPH_TIMEOUT_SECONDS` | `50` | Graph DAO timeout seconds | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_BATCH_SIZE` | `1000` | Graph DAO batch size | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_MULTI_PATH_SEARCH` | `false` | Allow path retraversal for all paths | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_BOOST_VIA_NODES` | `true` | Boost graph edges with via nodes | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_STATUS_ENABLED` | `false` | Enable soft delete tracking of URNs on edges | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_LINEAGE_MAX_HOPS` | `20` | Maximum hops to traverse lineage graph | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_HOPS` | `1000` | Maximum hops to traverse for impact analysis (impact.maxHops) | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_RELATIONS` | `40000` | Maximum number of relationships for impact analysis (impact.maxRelations) | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_SLICES` | `${elasticsearch.dataNodeCount}` | Number of slices for parallel search operations (impact.slices), defaults to dataNodeCount, minimum 2 | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_KEEP_ALIVE` | `5m` | Point-in-Time keepAlive duration for impact analysis queries (impact.keepAlive) | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_THREADS` | `32` | Maximum parallel lineage graph queries | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_QUERY_OPTIMIZATION` | `true` | Reduce query nesting if possible | GMS | +| `ELASTICSEARCH_SEARCH_GRAPH_POINT_IN_TIME_CREATION_ENABLED` | `true` | Enable creation of point in time snapshots for graph queries | GMS | ### Neo4j Configuration diff --git a/docs/docker/development.md b/docs/docker/development.md index 5571e18ea9a9..d17abdd01094 100644 --- a/docs/docker/development.md +++ b/docs/docker/development.md @@ -50,18 +50,15 @@ To see these changes in the deployment, a rebuilt of modified artifacts and a re The restart can be performed using following gradle task. ```shell -./gradlew :docker:debugReload +./gradlew :docker:reload ``` This single task will build the artifacts that were modified and restart only those containers that were affected by the rebuilt artifacts. -For each of the `quickstartDebug` variants, there is a corresponding `debugReload` task. -For `quickstartDebugConsumers`, the reload task is `debugConsumersReload` +`reload` is generally much faster than re-running `quickstartDebug` and is recommended after an initial bringup of all services via `quickstartDebug` followed +by loading the incremental changes using `reload`. -`debugReload` is generally much faster than re-running `quickstartDebug` and is recommended after an initial bringup of all services via `quickstartDebug` followed -by loading the incremental changes using `debugReload`. - -If there are significant changes to the code, for example due to pulling the latest code, it is recommended to start with a `quickstartDebug` and then iterate using `debugReload` +If there are significant changes to the code, for example due to pulling the latest code, it is recommended to start with a `quickstartDebug` and then iterate using `reload` # Setting environment variables via env files @@ -72,12 +69,12 @@ To use the env file, run DATAHUB_LOCAL_COMMON_ENV=my-settings.env ./gradlew quickstartDebug ``` -The `debugReload` process continues to work, but the restarted containers will use the same settings that were present at the time of running `./gradlew quickstartDebug`. +The `reload` process continues to work, but the restarted containers will use the same settings that were present at the time of running `./gradlew quickstartDebug`. -If you need to reload the containers with a different env file or changes made to the env file, a task `debugReloadEnv` builds the artifacts that have code changes +If you need to reload the containers with a different env file or changes made to the env file, a task `reloadEnv` builds the artifacts that have code changes and recreates all the containers that refer to these the env file via the DATAHUB_LOCAL_COMMON_ENV environment variable. -`debugReloadEnv` also has variants for all the `quickstartDebug` variants. For example, `quickstartDebugConsumers` has `debugConsumersReloadEnv` +The `reload` and `reloadEnv` tasks can only be run after running one of the debug variants of a auickstart task like `quickstartDebug` ## Start/Stop diff --git a/docs/how/load-indices.md b/docs/how/load-indices.md new file mode 100644 index 000000000000..1275c32e754b --- /dev/null +++ b/docs/how/load-indices.md @@ -0,0 +1,482 @@ +# Load Indices: High-Performance Bulk Index Loading + +LoadIndices is a high-performance upgrade task designed for bulk loading metadata aspects directly from the database into Elasticsearch/OpenSearch indices. Unlike RestoreIndices which focuses on correctness and consistency, LoadIndices is optimized for speed and throughput during initial deployments or large-scale data migrations. + +## Overview + +LoadIndices bypasses the standard event-driven processing pipeline to directly stream data from the `metadata_aspect_v2` table into search indices using optimized bulk operations. This approach provides significant performance improvements for large installations while making specific architectural trade-offs that prioritize speed over consistency. + +🚨 **CRITICAL WARNING**: LoadIndices is designed for specific use cases only and should **NEVER** be used in production environments with active concurrent writes, MCL-dependent systems, or real-time consistency requirements. See [Performance Trade-offs & Implications](#performance-trade-offs--implications) for complete details. + +### Key Features + +- **🚀 High Performance**: Direct streaming from database with optimized bulk operations +- **⚡ Fast Bulk Loading**: Optimized for speed over consistency during initial loads +- **🔧 Refresh Management**: Automatically disables refresh intervals during loading for optimal performance +- **📊 Comprehensive Monitoring**: Real-time progress reporting and performance metrics +- **⚙️ Configurable Isolation**: Utilizes READ_UNCOMMITTED transactions for faster scanning + +--- + +## Performance Trade-offs & Implications + +⚠️ **Critical Understanding**: LoadIndices prioritizes **performance over consistency** by making several architectural trade-offs. Understanding these implications is crucial before using LoadIndices in production environments. + +### 🚨 Key Trade-offs Made + +#### **1. BYPASS Kafka/MCL Event Pipeline** + +- **What**: LoadIndices completely bypasses Kafka MCL (Metadata Change Log) topics that normally propagate all metadata changes +- **Architecture**: `Database → LoadIndices → Elasticsearch` **vs** normal flow of `Database → Kafka MCL → Multiple Consumers → Elasticsearch/Graph/etc` +- **Impact**: **No MCL events published** - downstream systems lose visibility into metadata changes +- **Critical Implication**: + - **MCL-Dependent Analytics**: Won't have audit trail of metadata changes + - **Integrations**: External systems won't be notified of changes + - **Custom MCL Consumers**: Any custom consumers will miss these events entirely + - **✅ Graph Service**: WILL be updated (UpdateIndicesService handles graph indices) **⚠️ Only when Elasticsearch is used for graph storage** + +#### **2. BROKEN DataHub Event Architecture** + +- **What**: Violates DataHub's core design principle that "all metadata changes flow through Kafka MCL" +- **Normal Flow**: `Metadata Change → MCL Event → Kafka → Multiple Consumers → Various Stores` +- **LoadIndices Flow**: `Metadata Change → LoadIndices → Direct ES Write` (**Skips Kafka entirely**) + +#### **3. READ_UNCOMMITTED Isolation** + +- **What**: Uses `TxIsolation.READ_UNCOMMITTED` for faster database scanning +- **Impact**: May read **uncommitted changes** or **dirty reads** from concurrent transactions +- **Implication**: Data consistency not guaranteed during active writes to database + +#### **4. Refresh Interval Manipulation** + +- **What**: Automatically disables refresh intervals during bulk operations +- **Impact**: **Recent updates may not be immediately searchable** +- **Implication**: Users won't see real-time updates in search until refresh intervals are restored + +#### **5. No Write Concurrency Controls** + +- **What**: No coordination with concurrent Elasticsearch writes from live ingestion +- **Impact**: **Potential conflicts** with active ingestion pipelines +- **Implication**: Concurrent writes may cause data inconsistency or operation failures + +### ⚠️ When NOT to Use LoadIndices + +**❌ DO NOT use LoadIndices if you have:** + +- **Active ingestion pipelines** writing to Elasticsearch simultaneously +- **MCL-dependent systems** that need event notifications +- **Neo4j-based graph storage** (graph updates will be missing) +- **Real-time search requirements** during the loading process +- **Production traffic** that requires immediate search consistency + +### ✅ When LoadIndices is Appropriate + +**✅ Safe to use LoadIndices when:** + +- **Fresh deployment** with empty Elasticsearch cluster +- **Offline migration** with no concurrent users +- **Standalone indexing** without DataHub services running +- **Read-only replica environments** with no active writes +- **Development/testing** environments +- **Disaster recovery** scenarios where faster restoration is prioritized +- **Independent cluster setup** where you need to populate indices before services start +- **Elasticsearch-based graph storage** (graph gets updated automatically) + +### 🔒 Safety Requirements + +Before using LoadIndices in any environment: + +1. **Verify Minimal Infrastructure**: + + - **Database**: MySQL/PostgreSQL with `metadata_aspect_v2` table accessible (via Ebean ORM) + - **Elasticsearch**: Running cluster accessible via HTTP/HTTPS + - **DataHub Services**: ✅ **NOT required** - LoadIndices can run independently + - **⚠️ Check Graph Storage**: Verify if using Elasticsearch-based graph storage + - **⚠️ Check Database Type**: Confirm NOT using Cassandra (not supported) + +2. **Stop All Ingestion** (if DataHub is running): + + ```bash + # Disable all Kafka consumers + kubectl scale deployment --replicas=0 datahub-mae-consumer + kubectl scale deployment --replicas=0 datahub-mce-consumer + kubectl scale deployment --replicas=0 datahub-gms + ``` + +3. **Check Database Configuration**: + + ```bash + # Check if using Cassandra (LoadIndices NOT supported) + grep -i cassandra /path/to/datahub/docker/docker-compose.yml + + # Verify MySQL/PostgreSQL database is configured + grep -E "mysql\|postgres" /path/to/datahub/docker/docker-compose.yml + + # ⚠️ If Cassandra detected, LoadIndices is NOT available + # Must use RestoreIndices instead + ``` + +4. **Check Graph Storage Configuration**: + + ```bash + # Check if using Neo4j (graph updates will be MISSING) + grep -r "neo4j" /path/to/datahub/docker/docker-compose.yml + + # Check DataHub configuration for graph service selection + grep -i "graph.*elasticsearch\|neo4j" /path/to/datahub/conf/application.yml + + # ⚠️ If Neo4j is detected, LoadIndices will NOT update graph + ``` + +5. **Verify No Concurrent Writes**: + + ```bash + # Check for active Elasticsearch indexing + curl -s "localhost:9200/_nodes/stats" | grep "index_current" + # Should show "index_current": 0 + ``` + +6. **Index Clean State**: + + ```bash + # Ensure clean indexing state + curl -s "localhost:9200/_nodes/stats" | grep -E "refresh.*active" + ``` + +7. **Coordinate with Operations**: + - **Maintenance window** scheduling + - **User notification** of search unavailability + - **Monitoring** of downstream system dependencies + +### 📊 Consistency Guarantees + +| Level | LoadIndices | RestoreIndices | +| --------------------------------------- | ------------- | ------------------- | +| **URN-level Ordering** | ✅ Guaranteed | ✅ Guaranteed | +| **Real-time Searchability** | ❌ Delayed | ✅ Immediate | +| **Graph Service Updates (ES-based)** | ✅ Updated | ✅ Updated | +| **Graph Service Updates (Neo4j-based)** | ❌ Missing | ✅ Updated | +| **MCL Event Propagation** | ❌ Bypassed | ✅ Full propagation | +| **Concurrent Write Safety** | ❌ Not safe | ✅ Safe | + +#### **2. Restore Normal Operations** + +- **Re-enable ingestion** pipelines gradually +- **Monitor Elasticsearch** for conflicts +- **Validate downstream systems** are synchronized + +#### **3. Emergency Rollback Plan** + +```bash +# If issues arise, prepare rollback: +# 1. Stop LoadIndices immediately +# 2. Restore from backup indices +# 3. Re-run with RestoreIndices for correctness +``` + +--- + +## How LoadIndices Works + +LoadIndices operates as an upgrade task that can run **independently** without requiring DataHub services to be running. It consists of two main steps: + +1. **BuildIndicesStep**: Creates and configures Elasticsearch indices (creates indices if they don't exist) +2. **LoadIndicesStep**: Streams aspects from database and bulk loads them into indices + +### 🔧 Independent Operation Mode + +**Key Advantage**: LoadIndices only requires: + +- ✅ **MySQL/PostgreSQL** source database (via Ebean ORM) +- ✅ **Elasticsearch/OpenSearch** destination cluster +- ❌ **No DataHub services** (maui, frontend, etc.) required +- ❌ **Cassandra**: ⚠️ **NOT supported** (Ebean doesn't support Cassandra) + +This enables **offline bulk operations** during maintenance windows or initial deployments where DataHub infrastructure is being set up incrementally. + +**Index Creation**: The BuildIndicesStep automatically creates all required Elasticsearch indices based on `IndexConvention` patterns, so empty Elasticsearch clusters are fully supported. + +### Architecture Flow + +```mermaid +graph TD + A[LoadIndices Upgrade] --> B[BuildIndicesStep] + B --> C[Create/Configure Indices] + C --> D[LoadIndicesStep] + D --> E[Disable Refresh Intervals] + E --> F[Stream Aspects from DB] + F --> G[Batch Processing] + G --> H[Convert to MCL Events] + H --> I[Bulk Write to ES] + I --> J[Restore Refresh Intervals] +``` + +### Key Differences from RestoreIndices + +| Aspect | RestoreIndices | LoadIndices | +| ---------------------- | ----------------------------- | -------------------------- | +| **Purpose** | Correctness & consistency | Speed & throughput | +| **Processing** | Event-driven via MCL events | Direct bulk operations | +| **Isolation** | READ_COMMITTED | READ_UNCOMMITTED | +| **Refresh Management** | Static configuration | Dynamic disable/restore | +| **Performance Focus** | Accurate replay | Maximal speed | +| **Use Case** | Recovery from inconsistencies | Initial loads & migrations | + +--- + +## Deployment & Execution + +### 🚀 Standalone Deployment Advantage + +**Key Benefit**: LoadIndices can run with **minimal infrastructure** without requiring DataHub services to be running: + +```bash +# Minimal requirements +✅ MySQL/PostgreSQL database (with metadata_aspect_v2 table) +✅ Elasticsearch/OpenSearch cluster +❌ DataHub GMS/Maui services - NOT needed +❌ Kafka cluster - NOT needed +❌ Frontend services - NOT needed +``` + +### 🔧 Execution Methods + +LoadIndices can be executed via: + +1. **Gradle Task** (Recommended) + +```bash +# From datahub-upgrade directory +./gradlew runLoadIndices + +# With custom thread count +./gradlew runLoadIndices -PesThreadCount=6 +``` + +2. **IDE Execution**: Run `UpgradeTask.main()` with LoadIndices arguments + +3. **Standalone JAR**: Build and run datahub-upgrade JAR independently + +--- + +## LoadIndices Configuration Options + +### 🔄 Performance & Throttling + +| Argument | Description | Default | Example | +| ----------- | ------------------------------------------ | ------------------------------ | ------------------- | +| `batchSize` | Number of aspects per batch for processing | `10000` | `-a batchSize=5000` | +| `limit` | Maximum number of aspects to process | `Integer.MAX_VALUE` (no limit) | `-a limit=50000` | + +### 📅 Time Filtering + +| Argument | Description | Example | +| -------------- | --------------------------------------------------------------------- | ------------------------------- | +| `gePitEpochMs` | Only process aspects created **after** this timestamp (milliseconds) | `-a gePitEpochMs=1609459200000` | +| `lePitEpochMs` | Only process aspects created **before** this timestamp (milliseconds) | `-a lePitEpochMs=1640995200000` | + +### 🔍 Content Filtering + +| Argument | Description | Example | +| ------------- | ----------------------------------------------- | ----------------------------------------- | +| `urnLike` | SQL LIKE pattern to filter URNs | `-a urnLike=urn:li:dataset:%` | +| `aspectNames` | Comma-separated list of aspect names to process | `-a aspectNames=ownership,schemaMetadata` | +| `lastUrn` | Resume processing from this URN (inclusive) | `-a lastUrn=urn:li:dataset:my-dataset` | + +### ⚙️ System Configuration + +| Environment Variable | Description | Default | Example | +| ---------------------------- | --------------------------------------- | ----------------------------------- | ------------------------------ | +| `ELASTICSEARCH_THREAD_COUNT` | Number of I/O threads for BulkProcessor | `2` (app config), `4` (Gradle task) | `ELASTICSEARCH_THREAD_COUNT=4` | +| `ES_BULK_ASYNC` | Enable asynchronous bulk operations | `true` | `ES_BULK_ASYNC=true` | +| `ES_BULK_REQUESTS_LIMIT` | Maximum bulk requests per buffer | `10000` | `ES_BULK_REQUESTS_LIMIT=15000` | +| `ES_BULK_FLUSH_PERIOD` | Bulk flush interval in seconds | `300` (5 minutes) | `ES_BULK_FLUSH_PERIOD=300` | + +--- + +## Running LoadIndices + +### 🐳 Docker Compose + +If you're using Docker Compose with the DataHub source repository: + +```bash +# Basic LoadIndices execution +./docker/datahub-upgrade/datahub-upgrade.sh -u LoadIndices + +# LoadIndices with performance tuning +./docker/datahub-upgrade/datahub-upgrade.sh -u LoadIndices \ + -a batchSize=15000 \ + -a limit=100000 +``` + +### 🎯 Gradle Task (Development) + +For development and testing environments: + +```bash +# Run LoadIndices with default settings +./gradlew :datahub-upgrade:runLoadIndices + +# Run with custom thread count and batch size +./gradlew :datahub-upgrade:runLoadIndices \ + -PesThreadCount=4 \ + -PbatchSize=15000 \ + -Plimit=50000 +``` + +The Gradle task supports these parameters: + +- `esThreadCount`: Set `ELASTICSEARCH_THREAD_COUNT` (default: `4`) +- `batchSize`: Override batch size (default: `10000`) +- `limit`: Set processing limit +- `urnLike`: Filter by URN pattern +- `aspectNames`: Filter by aspect names +- `lePitEpochMs`: Process records created before this timestamp +- `gePitEpochMs`: Process records created after this timestamp +- `lastUrn`: Resume processing from this URN (inclusive) + +### 🐳 Docker Environment Variables + +Configure LoadIndices through Docker environment: + +```bash +# Target specific entity types +docker run --rm datahub-upgrade \ + -u LoadIndices \ + -a urnLike=urn:li:dataset:% \ + -a batchSize=20000 + +# Process specific aspects only +docker run --rm datahub-upgrade \ + -u LoadIndices \ + -a aspectNames=ownership,status,schemaMetadata \ + -a batchSize=15000 + +# Time-based filtering +docker run --rm datahub-upgrade \ + -u LoadIndices \ + -a gePitEpochMs=1640995200000 \ + -a limit=50000 + +# Resume from a specific URN +docker run --rm datahub-upgrade \ + -u LoadIndices \ + -a lastUrn=urn:li:dataset:my-dataset \ + -a batchSize=10000 +``` + +### 🔄 Resume Functionality + +LoadIndices supports resuming from a specific URN when processing is interrupted: + +#### **Resume from Last Processed URN** + +When LoadIndices runs, it logs the last URN processed in each batch: + +``` +Batch completed - Last URN processed: urn:li:dataset:my-dataset +Processed 10000 aspects - 150.2 aspects/sec - Last URN: urn:li:dataset:my-dataset +``` + +To resume from where you left off: + +```bash +# Resume from the last URN that was successfully processed +./gradlew :datahub-upgrade:runLoadIndices \ + -a lastUrn=urn:li:dataset:my-dataset \ + -a batchSize=10000 +``` + +#### **Resume Best Practices** + +- **Use the exact URN**: Copy the URN exactly as logged (including any URL encoding) +- **Inclusive processing**: The `lastUrn` parameter processes from the specified URN onwards (inclusive) +- **Monitor progress**: Watch the logs for the "Last URN processed" messages to track progress +- **Batch boundaries**: Resume works at the URN level, not batch level - some aspects may be reprocessed + +#### **Example Resume Workflow** + +```bash +# 1. Start initial processing +./gradlew :datahub-upgrade:runLoadIndices -a batchSize=5000 + +# 2. If interrupted, check logs for last URN: +# "Batch completed - Last URN processed: urn:li:dataset:my-dataset" + +# 3. Resume from that URN +./gradlew :datahub-upgrade:runLoadIndices \ + -a lastUrn=urn:li:dataset:my-dataset \ + -a batchSize=5000 +``` + +--- + +## Performance Optimization + +### 🚀 Elasticsearch/OpenSearch Configuration + +#### Bulk Processing Tuning + +```bash +# Optimize bulk settings for LoadIndices +export ES_BULK_REQUESTS_LIMIT=15000 +export ES_BULK_FLUSH_PERIOD=10 +export ES_BULK_ASYNC=true +export ELASTICSEARCH_THREAD_COUNT=4 +``` + +#### Connection Pool Optimization + +LoadIndices automatically configures connection pooling based on thread count: + +```yaml +# datahub-upgrade/build.gradle configuration +environment "ELASTICSEARCH_THREAD_COUNT", "4" # Auto-adjusts maxConnectionsPerRoute +``` + +## Comparison with RestoreIndices + +Understanding when to use LoadIndices vs RestoreIndices is crucial for optimal performance and data consistency. + +### 🎯 Purpose & Design Philosophy + +| Aspect | RestoreIndices | LoadIndices | +| --------------------- | ------------------------------ | ------------------------------- | +| **Primary Purpose** | Data consistency & correctness | Speed & throughput | +| **Design Philosophy** | Event-driven precision | Performance optimization | +| **Consistency Model** | Full consistency guarantee | Speed-optimized trade-offs | +| **Use Case** | Production recovery | Bulk migrations & initial loads | + +### 📊 Technical Comparison + +| Feature | RestoreIndices | LoadIndices | +| --------------------------------- | ---------------------------- | ------------------------ | +| **Database Isolation** | READ_COMMITTED | READ_UNCOMMITTED | +| **MCL Events** | ✅ Full MCL pipeline | ❌ Bypasses MCL entirely | +| **Graph Updates (Elasticsearch)** | ✅ Updated | ✅ Updated | +| **Graph Updates (Neo4j)** | ✅ Updated | ❌ Missing | +| **Database Support** | MySQL, PostgreSQL, Cassandra | MySQL, PostgreSQL only | +| **Performance** | Slower, safer | Faster, optimized | +| **Real-time Consistency** | ✅ Immediate | ❌ Delayed until refresh | +| **Concurrency Safety** | ✅ Safe | ❌ Not safe | + +### 🚀 When to Use Each Tool + +#### ✅ **Use RestoreIndices For:** + +- **Production recovery** from inconsistencies +- **Neo4j-based graph storage** deployments +- **Cassandra-based** metadata storage +- **Active ingestion** pipelines running +- **MCL-dependent systems** requiring event notifications +- **Precise event replay** scenarios + +#### ✅ **Use LoadIndices For:** + +- **Fresh deployments** with empty clusters +- **Bulk migrations** during maintenance windows +- **MySQL/PostgreSQL + Elasticsearch** configurations +- **Offline scenarios** with no concurrent writes +- **Development/testing** environments +- **Performance-critical** initial data loads diff --git a/docs/how/restore-indices.md b/docs/how/restore-indices.md index f7d22bbb511f..63b8ef78c712 100644 --- a/docs/how/restore-indices.md +++ b/docs/how/restore-indices.md @@ -279,7 +279,9 @@ Implementing these expanded best practices should help ensure a smoother, more e minimizing impact on your DataHub environment. This operation can be I/O intensive from the read-side from SQL and on the Elasticsearch write side. If you're able to leverage -provisioned I/O. or throughput, you might want to monitor your infrastructure for a possible. +provisioned I/O or throughput, you might want to monitor your infrastructure for a possible bottleneck. + +> 💡 **Performance Tip**: For bulk loading scenarios during initial deployment or major data migrations, consider using [LoadIndices](./load-indices.md) instead, which is optimized for high throughput rather than precise event replay. #### Elasticsearch/Opensearch Optimization diff --git a/docs/managed-datahub/release-notes/v_0_3_14.md b/docs/managed-datahub/release-notes/v_0_3_14.md index 4fb6e246e55f..db6b3c436522 100644 --- a/docs/managed-datahub/release-notes/v_0_3_14.md +++ b/docs/managed-datahub/release-notes/v_0_3_14.md @@ -21,6 +21,20 @@ This contains detailed release notes, but there's also an [announcement blog pos ## Release Changelog +### v0.3.14.1-acryl + +This is a maintenance release including all of the changes from v0.3.14 along with the following fixes and changes: + +Minor Fixes: + +- Invite Users: Improvements to Recommended Invitations UX and result quality +- Observe: UX and Performance imporvements to health dashboard +- Improvements to response quality in the Ask DataHub chatbot + +Critical Fixes: + +- Forms Reporting source saw increase in errors in v0.3.14 + ### v0.3.14-acryl New Features: diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index a28767f60367..c6aabaa0c625 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -31,8 +31,9 @@ task environmentSetup(type: Exec) { def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) - commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} && set -x && " + + def venv_copies_flag = System.getenv('DATAHUB_VENV_USE_COPIES') == 'true' ? '--copies' : '' + commandLine 'bash', '-c', + "${python_executable} -m venv ${venv_copies_flag} ${venv_name} && set -x && " + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle index 96fdc4c03f55..3b84d94b4b48 100644 --- a/metadata-ingestion-modules/dagster-plugin/build.gradle +++ b/metadata-ingestion-modules/dagster-plugin/build.gradle @@ -23,8 +23,9 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) + def venv_copies_flag = System.getenv('DATAHUB_VENV_USE_COPIES') == 'true' ? '--copies' : '' commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} && " + + "${python_executable} -m venv ${venv_copies_flag} ${venv_name} && " + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } diff --git a/metadata-ingestion-modules/gx-plugin/build.gradle b/metadata-ingestion-modules/gx-plugin/build.gradle index 4a0d50a818da..f630bc86494d 100644 --- a/metadata-ingestion-modules/gx-plugin/build.gradle +++ b/metadata-ingestion-modules/gx-plugin/build.gradle @@ -23,8 +23,9 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) + def venv_copies_flag = System.getenv('DATAHUB_VENV_USE_COPIES') == 'true' ? '--copies' : '' commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} && " + + "${python_executable} -m venv ${venv_copies_flag} ${venv_name} && " + "${venv_name}/bin/pip install --upgrade uv && " + "touch ${sentinel_file}" } diff --git a/metadata-ingestion-modules/prefect-plugin/build.gradle b/metadata-ingestion-modules/prefect-plugin/build.gradle index 2c4dd3610e2a..5874875c0633 100644 --- a/metadata-ingestion-modules/prefect-plugin/build.gradle +++ b/metadata-ingestion-modules/prefect-plugin/build.gradle @@ -23,8 +23,9 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) + def venv_copies_flag = System.getenv('DATAHUB_VENV_USE_COPIES') == 'true' ? '--copies' : '' commandLine 'bash', '-c', - "${python_executable} -m venv ${venv_name} && " + + "${python_executable} -m venv ${venv_copies_flag} ${venv_name} && " + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } diff --git a/metadata-ingestion/CLAUDE.md b/metadata-ingestion/CLAUDE.md index 087dbccf9712..26d785feb0e6 100644 --- a/metadata-ingestion/CLAUDE.md +++ b/metadata-ingestion/CLAUDE.md @@ -36,6 +36,17 @@ pytest tests/path/to/file.py::TestClass # Single test class pytest tests/path/to/file.py::TestClass::test_method # Single test ``` +## Environment Variables + +**Build configuration:** + +- `DATAHUB_VENV_USE_COPIES`: Set to `true` to use `--copies` flag when creating Python virtual environments. This copies the Python binary instead of creating a symlink. Useful for Nix environments, immutable filesystems, Windows, or container environments where symlinks don't work correctly. Increases disk usage and setup time, so only enable if needed. + + ```bash + export DATAHUB_VENV_USE_COPIES=true + ../gradlew :metadata-ingestion:installDev + ``` + ## Directory Structure - `src/datahub/`: Source code for the DataHub CLI and ingestion framework diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index 0876babe67c3..acffc4909f2f 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -24,8 +24,9 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) + def venv_copies_flag = System.getenv('DATAHUB_VENV_USE_COPIES') == 'true' ? '--copies' : '' commandLine 'bash', '-c', - "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_name}; fi && " + + "if [ ! -d ${venv_name} ] || [ ! -f ${venv_name}/bin/python ]; then ${python_executable} -m venv ${venv_copies_flag} ${venv_name}; fi && " + "set -x && " + // If we already have uv available, use it to upgrade uv. Otherwise, install it with pip. "if [ ! -f ${venv_name}/bin/uv ]; then ${venv_name}/bin/python -m pip install --upgrade uv; else ${venv_name}/bin/python -m uv pip install --upgrade uv; fi && " + diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md index b9219c7d9d55..e784964c5391 100644 --- a/metadata-ingestion/developing.md +++ b/metadata-ingestion/developing.md @@ -105,6 +105,22 @@ datahub version # should print "DataHub CLI version: unavailable (installed in Common issues (click to expand): +
+ Virtual environment creation fails with symlink errors (Nix, immutable filesystems, Windows) + +If you're using Nix, an immutable Python installation, Windows with certain filesystem configurations, or working in container environments where symlinks don't work correctly, you may encounter errors during virtual environment creation. + +You can enable the `--copies` flag for Python's venv by setting an environment variable before running the gradle commands: + +```shell +export DATAHUB_VENV_USE_COPIES=true +../gradlew :metadata-ingestion:installDev +``` + +This copies the Python binary instead of creating a symlink. Note that this increases disk usage and setup time, so only enable it if you're experiencing issues with the default symlink-based approach. + +
+
datahub command not found with PyPI install diff --git a/metadata-ingestion/docs/sources/metadata-file/file_recipe.yml b/metadata-ingestion/docs/sources/metadata-file/file_recipe.yml index e825505aebf0..7cc405959059 100644 --- a/metadata-ingestion/docs/sources/metadata-file/file_recipe.yml +++ b/metadata-ingestion/docs/sources/metadata-file/file_recipe.yml @@ -2,7 +2,7 @@ source: type: file config: # Coordinates - filename: ./path/to/mce/file.json + path: ./path/to/mce/file.json sink: - # sink configs \ No newline at end of file + # sink configs diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md index b3261f7444af..839fa7da7934 100644 --- a/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md +++ b/metadata-ingestion/docs/sources/snowflake/snowflake_pre.md @@ -3,7 +3,7 @@ In order to execute this source, your Snowflake user will need to have specific privileges granted to it for reading metadata from your warehouse. -Snowflake system admin can follow this guide to create a DataHub-specific role, assign it the required privileges, and assign it to a new DataHub user by executing the following Snowflake commands from a user with the `ACCOUNTADMIN` role or `MANAGE GRANTS` privilege. +A Snowflake system admin can follow this guide to create a DataHub-specific role, assign it the required privileges, and assign it to a new DataHub user by executing the following Snowflake commands from a user with the `ACCOUNTADMIN` role or `MANAGE GRANTS` privilege. ```sql create or replace role datahub_role; @@ -48,28 +48,28 @@ grant role datahub_role to user datahub_user; grant imported privileges on database snowflake to role datahub_role; ``` -The details of each granted privilege can be viewed in [snowflake docs](https://docs.snowflake.com/en/user-guide/security-access-control-privileges.html). A summarization of each privilege, and why it is required for this connector: +The details of each granted privilege can be viewed in the [Snowflake docs](https://docs.snowflake.com/en/user-guide/security-access-control-privileges.html). A summary of each privilege and why it is required for this connector: - `operate` is required only to start the warehouse. If the warehouse is already running during ingestion or has auto-resume enabled, this permission is not required. -- `usage` is required for us to run queries using the warehouse -- `usage` on `database` and `schema` are required because without it tables, views, and streams inside them are not accessible. If an admin does the required grants on `table` but misses the grants on `schema` or the `database` in which the table/view/stream exists then we will not be able to get metadata for the table/view/stream. -- If metadata is required only on some schemas then you can grant the usage privileges only on a particular schema like +- `usage` is required to run queries using the warehouse +- `usage` on `database` and `schema` are required because without them, tables, views, and streams inside them are not accessible. If an admin does the required grants on `table` but misses the grants on `schema` or the `database` in which the table/view/stream exists, then we will not be able to get metadata for the table/view/stream. +- If metadata is required only on some schemas, then you can grant the usage privileges only on a particular schema like: ```sql grant usage on schema ""."" to role datahub_role; ``` -- `select` on `streams` is required in order for stream definitions to be available. This does not allow selecting of the data (not required) unless the underlying dataset has select access as well. +- `select` on `streams` is required for stream definitions to be available. This does not allow selecting the data (not required) unless the underlying dataset has select access as well. ```sql grant usage on schema ""."" to role datahub_role; ``` -This represents the bare minimum privileges required to extract databases, schemas, views, tables from Snowflake. +This represents the bare minimum privileges required to extract databases, schemas, views, and tables from Snowflake. -If you plan to enable extraction of table lineage, via the `include_table_lineage` config flag, extraction of usage statistics, via the `include_usage_stats` config, or extraction of tags (without lineage), via the `extract_tags` config, you'll also need to grant access to the [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage.html) system tables, using which the DataHub source extracts information. This can be done by granting access to the `snowflake` database. +If you plan to enable extraction of table lineage via the `include_table_lineage` config flag, extraction of usage statistics via the `include_usage_stats` config, or extraction of tags (without lineage) via the `extract_tags` config, you'll also need to grant access to the [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage.html) system tables from which the DataHub source extracts information. This can be done by granting access to the `snowflake` database. ```sql grant imported privileges on database snowflake to role datahub_role; @@ -83,13 +83,13 @@ Alternatively, other authentication methods are supported via the `authenticatio #### Key Pair Authentication -To set up Key Pair authentication, follow the three steps in [this guide](https://docs.snowflake.com/en/user-guide/key-pair-auth#configuring-key-pair-authentication) +To set up Key Pair authentication, follow the three steps in [this guide](https://docs.snowflake.com/en/user-guide/key-pair-auth#configuring-key-pair-authentication): - Generate the private key - Generate the public key -- Assign the public key to datahub user to be configured in recipe. +- Assign the public key to the DataHub user to be configured in the recipe. -Pass in the following values in recipe config instead of password, ensuring the private key maintains proper PEM format with line breaks at the beginning, end, and approximately every 64 characters within the key: +Pass in the following values in the recipe config instead of a password, ensuring the private key maintains proper PEM format with line breaks at the beginning, end, and approximately every 64 characters within the key: ```yml authentication_type: KEY_PAIR_AUTHENTICATOR @@ -111,7 +111,7 @@ Pass in the following values, as described in the article, for your recipe's `oa - `authority_url`: `` - `scopes`: The list of your _Okta_ scopes, i.e. with the `session:role:` prefix -Datahub only supports two OAuth grant types: `client_credentials` and `password`. +DataHub only supports two OAuth grant types: `client_credentials` and `password`. The steps slightly differ based on which you decide to use. ##### Client Credentials Grant Type (Simpler) @@ -121,7 +121,7 @@ The steps slightly differ based on which you decide to use. - Note your `Client ID` - Create a Snowflake user to correspond to your newly created Okta client credentials - _Ensure the user's `Login Name` matches your Okta application's `Client ID`_ - - Ensure the user has been granted your datahub role + - Ensure the user has been granted your DataHub role ##### Password Grant Type @@ -131,19 +131,19 @@ The steps slightly differ based on which you decide to use. - Create an Okta user to sign into, noting the `Username` and `Password` - Create a Snowflake user to correspond to your newly created Okta client credentials - _Ensure the user's `Login Name` matches your Okta user's `Username` (likely an email)_ - - Ensure the user has been granted your datahub role + - Ensure the user has been granted your DataHub role - When running ingestion, provide the required `oauth_config` fields, including `client_id` and `client_secret`, plus your Okta user's `Username` and `Password` - Note: the `username` and `password` config options are not nested under `oauth_config` ### Snowflake Shares -If you are using [Snowflake Shares](https://docs.snowflake.com/en/user-guide/data-sharing-provider) to share data across different snowflake accounts, and you have set up DataHub recipes for ingesting metadata from all these accounts, you may end up having multiple similar dataset entities corresponding to virtual versions of same table in different snowflake accounts. DataHub Snowflake connector can automatically link such tables together through Siblings and Lineage relationship if user provides information necessary to establish the relationship using configuration `shares` in recipe. +If you are using [Snowflake Shares](https://docs.snowflake.com/en/user-guide/data-sharing-provider) to share data across different Snowflake accounts, and you have set up DataHub recipes for ingesting metadata from all these accounts, you may end up having multiple similar dataset entities corresponding to virtual versions of the same table in different Snowflake accounts. The DataHub Snowflake connector can automatically link such tables together through Siblings and Lineage relationships if the user provides information necessary to establish the relationship using the `shares` configuration in the recipe. #### Example - Snowflake account `account1` (ingested as platform_instance `instance1`) owns a database `db1`. A share `X` is created in `account1` that includes database `db1` along with schemas and tables inside it. -- Now, `X` is shared with snowflake account `account2` (ingested as platform_instance `instance2`). A database `db1_from_X` is created from inbound share `X` in `account2`. In this case, all tables and views included in share `X` will also be present in `instance2`.`db1_from_X`. +- Now, `X` is shared with Snowflake account `account2` (ingested as platform_instance `instance2`). A database `db1_from_X` is created from inbound share `X` in `account2`. In this case, all tables and views included in share `X` will also be present in `instance2.db1_from_X`. - This can be represented in `shares` configuration section as ```yaml shares: @@ -154,11 +154,37 @@ If you are using [Snowflake Shares](https://docs.snowflake.com/en/user-guide/dat - database: db1_from_X platform_instance: instance2 ``` -- If share `X` is shared with more snowflake accounts and database is created from share `X` in those account then additional entries need to be added in `consumers` list for share `X`, one per snowflake account. The same `shares` config can then be copied across recipes of all accounts. +- If share `X` is shared with more Snowflake accounts and a database is created from share `X` in those accounts, then additional entries need to be added to the `consumers` list for share `X`, one per Snowflake account. The same `shares` config can then be copied across recipes for all accounts. + +### Lineage and Usage + +DataHub supports two strategies for extracting lineage and usage information from Snowflake: + +#### New Strategy (Default - `use_queries_v2: true`) + +The default and recommended approach uses an optimized query extraction method that: + +- **Better Performance**: Fetches query logs in a single optimized query instead of multiple separate queries +- **Enhanced Features**: + - Query entities generation (`include_queries`) + - Query popularity statistics (`include_query_usage_statistics`) + - User filtering with patterns (`pushdown_deny_usernames`, `pushdown_allow_usernames`) + - Database pattern pushdown for performance (`push_down_database_pattern_access_history`) + - Query deduplication strategies (`query_dedup_strategy`) + +#### Legacy Strategy (`use_queries_v2: false`) + +The older approach that will be deprecated in future versions: + +- Uses separate extractors for lineage and usage +- Less performant due to multiple query executions +- Limited feature support compared to the new strategy + +Both strategies access the same Snowflake system tables (`account_usage.query_history`, `account_usage.access_history`), but the new strategy provides significant performance improvements and additional functionality. ### Caveats - Some of the features are only available in the Snowflake Enterprise Edition. This includes dynamic tables, advanced lineage features, and tags. This doc has notes mentioning where this applies. - Dynamic tables require the `monitor` privilege for metadata extraction. Without this privilege, dynamic tables will not be visible to DataHub. - The underlying Snowflake views that we use to get metadata have a [latency of 45 minutes to 3 hours](https://docs.snowflake.com/en/sql-reference/account-usage.html#differences-between-account-usage-and-information-schema). So we would not be able to get very recent metadata in some cases like queries you ran within that time period etc. This is applicable particularly for lineage, usage and tags (without lineage) extraction. -- If there is any [incident going on for Snowflake](https://status.snowflake.com/) we will not be able to get the metadata until that incident is resolved. +- If there is any [ongoing Snowflake incident](https://status.snowflake.com/), we will not be able to get the metadata until that incident is resolved. diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 2360d0a8a635..9078e92b207d 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -369,7 +369,12 @@ } databricks_common = { - "databricks-sqlalchemy~=1.0", # Note: This is pinned to 1.0 for compatibility with SQLAlchemy 1.x which is default for fivetran + # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes + # Version 3.0.0 required SQLAlchemy > 2.0.21 + # TODO: When upgrading to >=3.0.0, remove proxy authentication monkey patching + # in src/datahub/ingestion/source/unity/proxy.py (_patch_databricks_sql_proxy_auth) + # as the fix was included natively in 3.0.0 via https://github.com/databricks/databricks-sql-python/pull/354 + "databricks-sql-connector>=2.8.0,<3.0.0", } databricks = { @@ -378,12 +383,6 @@ "databricks-sdk>=0.30.0", "pyspark~=3.5.6", "requests", - # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes - # Version 3.0.0 required SQLAlchemy > 2.0.21 - # TODO: When upgrading to >=3.0.0, remove proxy authentication monkey patching - # in src/datahub/ingestion/source/unity/proxy.py (_patch_databricks_sql_proxy_auth) - # as the fix was included natively in 3.0.0 via https://github.com/databricks/databricks-sql-python/pull/354 - "databricks-sql-connector>=2.8.0,<3.0.0", # Due to https://github.com/databricks/databricks-sql-python/issues/326 # databricks-sql-connector<3.0.0 requires pandas<2.2.0 "pandas<2.2.0", @@ -593,9 +592,9 @@ ), "powerbi-report-server": powerbi_report_server, "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.2"}, - "unity-catalog": databricks | sql_common, + "unity-catalog": databricks_common | databricks | sql_common, # databricks is alias for unity-catalog and needs to be kept in sync - "databricks": databricks | sql_common, + "databricks": databricks_common | databricks | sql_common, "fivetran": snowflake_common | bigquery_common | databricks_common diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 4e9798e812cc..af0f862e8a21 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -173,11 +173,11 @@ class ConnectionModel(BaseModel): """Represents the config associated with a connection""" class Config: - if PYDANTIC_VERSION_2: # noqa: SIM108 + if PYDANTIC_VERSION_2: extra = "allow" else: extra = Extra.allow - underscore_attrs_are_private = True + underscore_attrs_are_private = True class TransformerSemantics(ConfigEnum): diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py new file mode 100644 index 000000000000..31913b15358f --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py @@ -0,0 +1,87 @@ +import logging +from typing import TYPE_CHECKING, Iterable, List + +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import InputFieldClass, InputFieldsClass + +if TYPE_CHECKING: + from datahub.ingestion.api.source import SourceReport + +logger = logging.getLogger(__name__) + + +class ValidateInputFieldsProcessor: + def __init__(self, report: "SourceReport"): + self.report = report + + def validate_input_fields( + self, + stream: Iterable[MetadataWorkUnit], + ) -> Iterable[MetadataWorkUnit]: + """ + Validate input fields and filter out invalid ones. + + Invalid input fields have empty or missing fieldPath values, which would cause + URN generation to fail when sent to the server. This processor filters them out + and reports them as warnings. + """ + for wu in stream: + input_fields_aspect = wu.get_aspect_of_type(InputFieldsClass) + if input_fields_aspect and input_fields_aspect.fields: + valid_fields: List[InputFieldClass] = [] + invalid_count = 0 + + for input_field in input_fields_aspect.fields: + if ( + input_field.schemaField + and input_field.schemaField.fieldPath + and input_field.schemaField.fieldPath.strip() + ): + valid_fields.append(input_field) + else: + invalid_count += 1 + + if invalid_count > 0: + logger.debug( + f"Filtered {invalid_count} invalid input field(s) with empty fieldPath for {wu.get_urn()}" + ) + self.report.num_input_fields_filtered += invalid_count + self.report.warning( + title="Invalid input fields filtered", + message="Input fields with empty fieldPath values were filtered out to prevent ingestion errors", + context=f"Filtered {invalid_count} invalid input field(s) for {wu.get_urn()}", + ) + + # Update the aspect with only valid fields + if valid_fields: + input_fields_aspect.fields = valid_fields + else: + # If no valid fields remain, skip this workunit entirely + logger.debug( + f"All input fields were invalid for {wu.get_urn()}, skipping InputFieldsClass workunit" + ) + # Don't yield this workunit + continue + + yield wu + + def _remove_input_fields_aspect(self, wu: MetadataWorkUnit) -> MetadataWorkUnit: + """Remove InputFieldsClass aspect from a workunit.""" + # For MCPs, we can simply not yield the aspect + # For MCEs, we need to remove it from the snapshot + if hasattr(wu.metadata, "aspect") and isinstance( + wu.metadata.aspect, InputFieldsClass + ): + # This is an MCP with InputFieldsClass, skip it + return wu + + if hasattr(wu.metadata, "proposedSnapshot"): + snapshot = wu.metadata.proposedSnapshot + if hasattr(snapshot, "aspects"): + snapshot.aspects = [ + aspect + for aspect in snapshot.aspects + if not isinstance(aspect, InputFieldsClass) + ] + + return wu diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 1e30a4300d4c..70367dea8ea8 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -31,6 +31,9 @@ from datahub.ingestion.api.auto_work_units.auto_ensure_aspect_size import ( EnsureAspectSizeProcessor, ) +from datahub.ingestion.api.auto_work_units.auto_validate_input_fields import ( + ValidateInputFieldsProcessor, +) from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit from datahub.ingestion.api.report import ExamplesReport, Report @@ -215,6 +218,7 @@ class SourceReport(ExamplesReport, IngestionStageReport): event_not_produced_warn: bool = True events_produced: int = 0 events_produced_per_sec: int = 0 + num_input_fields_filtered: int = 0 _structured_logs: StructuredLogs = field(default_factory=StructuredLogs) @@ -543,6 +547,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: browse_path_processor, partial(auto_workunit_reporter, self.get_report()), auto_patch_last_modified, + ValidateInputFieldsProcessor(self.get_report()).validate_input_fields, EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size, ] diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py index 323326e31619..c9d8ef7833f9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/config.py @@ -13,8 +13,9 @@ ) from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig from datahub.ingestion.source.aws.s3_util import is_s3_uri -from datahub.ingestion.source.state.stateful_ingestion_base import ( +from datahub.ingestion.source.state.stale_entity_removal_handler import ( StatefulIngestionConfigBase, + StatefulStaleMetadataRemovalConfig, ) # hide annoying debug errors from py4j @@ -39,9 +40,7 @@ class S3(ConfigModel): class DeltaLakeSourceConfig( - PlatformInstanceConfigMixin, - EnvConfigMixin, - StatefulIngestionConfigBase, + PlatformInstanceConfigMixin, EnvConfigMixin, StatefulIngestionConfigBase ): base_path: str = Field( description="Path to table (s3 or local file system). If path is not a delta table path " @@ -80,6 +79,11 @@ class DeltaLakeSourceConfig( s3: Optional[S3] = Field(None) + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( + default=None, + description="Stateful Ingestion Config with stale metadata removal", + ) + @cached_property def is_s3(self): return is_s3_uri(self.base_path or "") diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index 19155c9f8a5f..90780f7f847d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -92,6 +92,7 @@ def parse_obj(cls, data: Dict[str, Any]) -> "Dashboard": """Custom parsing to handle nested panel extraction.""" dashboard_data = data.get("dashboard", {}) _panel_data = dashboard_data.get("panels", []) + panels = [] try: panels = cls.extract_panels(_panel_data) except Exception as e: @@ -108,6 +109,10 @@ def parse_obj(cls, data: Dict[str, Any]) -> "Dashboard": if "meta" in dashboard_dict: del dashboard_dict["meta"] + # Handle refresh field type mismatch - convert boolean to string + if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool): + dashboard_dict["refresh"] = str(dashboard_dict["refresh"]) + return super().parse_obj(dashboard_dict) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 2308a6ab6c57..c79e612056a9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -214,6 +214,10 @@ class ModeConfig( description="Number of items per page for paginated API requests.", ) + exclude_archived: bool = Field( + default=False, description="Exclude archived reports" + ) + @validator("connect_uri") def remove_trailing_slash(cls, v): return config_clean.remove_trailing_slashes(v) @@ -1473,6 +1477,15 @@ def _get_reports(self, space_token: str) -> Iterator[List[dict]]: logger.debug( f"Read {len(reports_page)} reports records from workspace {self.workspace_uri} space {space_token}" ) + if self.config.exclude_archived: + logger.debug( + f"Excluding archived reports since exclude_archived: {self.config.exclude_archived}" + ) + reports_page = [ + report + for report in reports_page + if not report.get("archived", False) + ] yield reports_page except ModeRequestError as e: if isinstance(e, HTTPError) and e.response.status_code == 404: diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py b/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py index 0c211263f57e..132ccb85ad34 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py @@ -4,7 +4,6 @@ from dataclasses import dataclass, field from datetime import datetime, timezone from enum import Enum -from typing import Tuple from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.stats_collections import TopKDict @@ -38,9 +37,7 @@ class IngestionStageReport: ingestion_high_stage_seconds: dict[IngestionHighStage, float] = field( default_factory=lambda: defaultdict(float) ) - ingestion_stage_durations: TopKDict[Tuple[IngestionHighStage, str], float] = field( - default_factory=TopKDict - ) + ingestion_stage_durations: TopKDict[str, float] = field(default_factory=TopKDict) def new_stage( self, stage: str, high_stage: IngestionHighStage = IngestionHighStage._UNDEFINED @@ -81,9 +78,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): f"Time spent in stage <{self._ingestion_stage}>: {elapsed} seconds", stacklevel=2, ) - self._report.ingestion_stage_durations[ - (self._high_stage, self._ingestion_stage) - ] = elapsed + # Store tuple as string to avoid serialization errors + key = f"({self._high_stage.value}, {self._ingestion_stage})" + self._report.ingestion_stage_durations[key] = elapsed else: logger.info( f"Time spent in stage <{self._high_stage.value}>: {elapsed} seconds", diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index 33ffcd752e62..587e51699dcf 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -40,9 +40,6 @@ def _get_dialect_str(platform: str) -> str: # let the fuzzy resolution logic handle it. # MariaDB is a fork of MySQL, so we reuse the same dialect. return "mysql, normalization_strategy = lowercase" - # Dremio is based upon drill. Not 100% compatibility - elif platform == "dremio": - return "drill" else: return platform diff --git a/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json b/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json index e83034a58223..c16a3a53c186 100644 --- a/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json +++ b/metadata-ingestion/tests/integration/dremio/dremio_mces_golden.json @@ -15,7 +15,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -31,7 +31,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -49,7 +49,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -65,7 +65,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -85,7 +85,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -105,7 +105,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -121,7 +121,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -139,7 +139,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -155,7 +155,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -175,7 +175,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -195,7 +195,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -211,7 +211,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -229,7 +229,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -245,7 +245,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -265,7 +265,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -285,7 +285,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -301,7 +301,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -319,7 +319,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -335,7 +335,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -355,7 +355,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -375,7 +375,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -391,7 +391,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -409,7 +409,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -425,7 +425,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -445,7 +445,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -465,7 +465,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -481,7 +481,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -497,7 +497,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -515,7 +515,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -531,7 +531,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -555,7 +555,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -575,7 +575,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -591,7 +591,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -607,7 +607,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -625,7 +625,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -641,7 +641,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -665,7 +665,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -685,7 +685,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -701,7 +701,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -717,7 +717,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -735,7 +735,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -751,7 +751,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -775,7 +775,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -795,7 +795,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -811,7 +811,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -827,7 +827,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -845,7 +845,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -861,7 +861,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -885,7 +885,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -905,7 +905,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -921,7 +921,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -937,7 +937,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -955,7 +955,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -971,7 +971,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -995,7 +995,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1015,7 +1015,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1031,7 +1031,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1047,7 +1047,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1065,7 +1065,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1081,7 +1081,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1105,7 +1105,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1125,7 +1125,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1141,7 +1141,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1157,7 +1157,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1175,7 +1175,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1191,7 +1191,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1215,7 +1215,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1235,7 +1235,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1251,7 +1251,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1267,7 +1267,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1285,7 +1285,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1301,7 +1301,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1325,7 +1325,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1345,7 +1345,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1361,7 +1361,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1377,7 +1377,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1395,7 +1395,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1411,7 +1411,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1439,7 +1439,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1459,7 +1459,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1475,7 +1475,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1491,7 +1491,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1509,7 +1509,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1525,7 +1525,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1553,7 +1553,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1573,7 +1573,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1589,7 +1589,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1605,7 +1605,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1623,7 +1623,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1639,7 +1639,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1671,7 +1671,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1691,7 +1691,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1707,7 +1707,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1723,7 +1723,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1741,7 +1741,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1757,7 +1757,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1793,7 +1793,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1817,7 +1817,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1835,7 +1835,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1851,7 +1851,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1867,7 +1867,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1885,7 +1885,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -1915,7 +1915,7 @@ }, "fields": [ { - "fieldPath": "D", + "fieldPath": "G", "nullable": true, "type": { "type": { @@ -1939,7 +1939,7 @@ "isPartOfKey": false }, { - "fieldPath": "G", + "fieldPath": "A", "nullable": true, "type": { "type": { @@ -1951,7 +1951,7 @@ "isPartOfKey": false }, { - "fieldPath": "H", + "fieldPath": "B", "nullable": true, "type": { "type": { @@ -1963,7 +1963,7 @@ "isPartOfKey": false }, { - "fieldPath": "I", + "fieldPath": "C", "nullable": true, "type": { "type": { @@ -1987,7 +1987,7 @@ "isPartOfKey": false }, { - "fieldPath": "A", + "fieldPath": "D", "nullable": true, "type": { "type": { @@ -1999,7 +1999,7 @@ "isPartOfKey": false }, { - "fieldPath": "B", + "fieldPath": "I", "nullable": true, "type": { "type": { @@ -2011,7 +2011,7 @@ "isPartOfKey": false }, { - "fieldPath": "C", + "fieldPath": "H", "nullable": true, "type": { "type": { @@ -2027,7 +2027,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2043,7 +2043,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2067,7 +2067,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2091,7 +2091,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2109,7 +2109,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2125,7 +2125,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2141,7 +2141,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2159,7 +2159,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2189,19 +2189,19 @@ }, "fields": [ { - "fieldPath": "first_name", + "fieldPath": "id", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "integer(32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "last_name", + "fieldPath": "email_address", "nullable": true, "type": { "type": { @@ -2213,19 +2213,19 @@ "isPartOfKey": false }, { - "fieldPath": "id", + "fieldPath": "first_name", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer(32)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "company", + "fieldPath": "last_name", "nullable": true, "type": { "type": { @@ -2237,26 +2237,26 @@ "isPartOfKey": false }, { - "fieldPath": "priority", + "fieldPath": "company", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "float(24)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "email_address", + "fieldPath": "priority", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "float(24)", "recursive": false, "isPartOfKey": false } @@ -2265,7 +2265,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2281,7 +2281,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2309,7 +2309,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2333,7 +2333,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2351,7 +2351,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2367,7 +2367,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2383,7 +2383,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2401,7 +2401,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2431,43 +2431,43 @@ }, "fields": [ { - "fieldPath": "version", + "fieldPath": "metadata", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "metadata", + "fieldPath": "version", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "createdon", + "fieldPath": "aspect", "nullable": true, "type": { "type": { - "com.linkedin.schema.DateType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "timestamp(23)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "createdby", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -2479,7 +2479,7 @@ "isPartOfKey": false }, { - "fieldPath": "createdfor", + "fieldPath": "createdby", "nullable": true, "type": { "type": { @@ -2491,7 +2491,7 @@ "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "createdfor", "nullable": true, "type": { "type": { @@ -2503,14 +2503,14 @@ "isPartOfKey": false }, { - "fieldPath": "aspect", + "fieldPath": "createdon", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "timestamp(23)", "recursive": false, "isPartOfKey": false } @@ -2519,7 +2519,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2535,7 +2535,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2563,7 +2563,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2587,7 +2587,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2605,7 +2605,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2621,7 +2621,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2637,7 +2637,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2655,7 +2655,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2773,7 +2773,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2789,7 +2789,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2817,7 +2817,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2841,7 +2841,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2859,7 +2859,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2875,7 +2875,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2891,7 +2891,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2909,7 +2909,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -2939,19 +2939,19 @@ }, "fields": [ { - "fieldPath": "id", + "fieldPath": "doubleVal", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "double(53)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "path", "nullable": true, "type": { "type": { @@ -2963,7 +2963,7 @@ "isPartOfKey": false }, { - "fieldPath": "path", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -2975,14 +2975,14 @@ "isPartOfKey": false }, { - "fieldPath": "doubleVal", + "fieldPath": "id", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "double(53)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false } @@ -2991,7 +2991,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3007,7 +3007,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3035,7 +3035,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3059,7 +3059,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3077,7 +3077,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3093,7 +3093,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3109,7 +3109,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3127,7 +3127,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3197,7 +3197,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3213,7 +3213,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3241,7 +3241,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3265,7 +3265,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3283,7 +3283,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3299,7 +3299,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3315,7 +3315,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3333,7 +3333,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3415,7 +3415,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3431,7 +3431,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3459,7 +3459,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3483,7 +3483,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3501,7 +3501,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3517,7 +3517,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3533,7 +3533,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3615,7 +3615,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3631,7 +3631,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3656,7 +3656,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3680,7 +3680,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3704,7 +3704,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3722,7 +3722,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3738,7 +3738,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3754,7 +3754,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3784,7 +3784,7 @@ }, "fields": [ { - "fieldPath": "aspect", + "fieldPath": "createdby", "nullable": true, "type": { "type": { @@ -3796,14 +3796,14 @@ "isPartOfKey": false }, { - "fieldPath": "version", + "fieldPath": "createdon", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "timestamp(23)", "recursive": false, "isPartOfKey": false }, @@ -3820,19 +3820,19 @@ "isPartOfKey": false }, { - "fieldPath": "createdon", + "fieldPath": "version", "nullable": true, "type": { "type": { - "com.linkedin.schema.DateType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "timestamp(23)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "createdby", + "fieldPath": "aspect", "nullable": true, "type": { "type": { @@ -3844,7 +3844,7 @@ "isPartOfKey": false }, { - "fieldPath": "createdfor", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -3856,7 +3856,7 @@ "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "createdfor", "nullable": true, "type": { "type": { @@ -3872,7 +3872,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3888,7 +3888,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3913,7 +3913,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3941,7 +3941,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3965,7 +3965,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3983,7 +3983,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -3999,7 +3999,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4015,7 +4015,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4133,7 +4133,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4149,7 +4149,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4174,7 +4174,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4202,7 +4202,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4226,7 +4226,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4244,7 +4244,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4260,7 +4260,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4276,7 +4276,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4318,38 +4318,38 @@ "isPartOfKey": false }, { - "fieldPath": "doubleVal", + "fieldPath": "id", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "double(53)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "path", + "fieldPath": "doubleVal", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "double(53)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "id", + "fieldPath": "path", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false } @@ -4358,7 +4358,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4374,7 +4374,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4399,7 +4399,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4427,7 +4427,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4451,7 +4451,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4469,7 +4469,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4485,7 +4485,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4501,7 +4501,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4531,31 +4531,31 @@ }, "fields": [ { - "fieldPath": "priority", + "fieldPath": "id", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "float(24)", + "nativeDataType": "integer(32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "id", + "fieldPath": "company", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer(32)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "company", + "fieldPath": "last_name", "nullable": true, "type": { "type": { @@ -4567,7 +4567,7 @@ "isPartOfKey": false }, { - "fieldPath": "last_name", + "fieldPath": "first_name", "nullable": true, "type": { "type": { @@ -4579,7 +4579,7 @@ "isPartOfKey": false }, { - "fieldPath": "first_name", + "fieldPath": "email_address", "nullable": true, "type": { "type": { @@ -4591,14 +4591,14 @@ "isPartOfKey": false }, { - "fieldPath": "email_address", + "fieldPath": "priority", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "float(24)", "recursive": false, "isPartOfKey": false } @@ -4607,7 +4607,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4623,7 +4623,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4648,7 +4648,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4676,7 +4676,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4700,7 +4700,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4718,7 +4718,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4734,7 +4734,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4750,7 +4750,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4780,7 +4780,7 @@ }, "fields": [ { - "fieldPath": "id", + "fieldPath": "customer_id", "nullable": true, "type": { "type": { @@ -4792,26 +4792,26 @@ "isPartOfKey": false }, { - "fieldPath": "customer_id", + "fieldPath": "description", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer(32)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "description", + "fieldPath": "id", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "integer(32)", "recursive": false, "isPartOfKey": false } @@ -4820,7 +4820,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4836,7 +4836,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4861,7 +4861,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4889,7 +4889,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4913,7 +4913,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4931,7 +4931,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4947,7 +4947,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4963,7 +4963,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -4993,7 +4993,7 @@ }, "fields": [ { - "fieldPath": "H", + "fieldPath": "B", "nullable": true, "type": { "type": { @@ -5005,7 +5005,7 @@ "isPartOfKey": false }, { - "fieldPath": "G", + "fieldPath": "C", "nullable": true, "type": { "type": { @@ -5017,7 +5017,7 @@ "isPartOfKey": false }, { - "fieldPath": "F", + "fieldPath": "D", "nullable": true, "type": { "type": { @@ -5041,7 +5041,7 @@ "isPartOfKey": false }, { - "fieldPath": "D", + "fieldPath": "F", "nullable": true, "type": { "type": { @@ -5053,7 +5053,7 @@ "isPartOfKey": false }, { - "fieldPath": "C", + "fieldPath": "G", "nullable": true, "type": { "type": { @@ -5065,7 +5065,7 @@ "isPartOfKey": false }, { - "fieldPath": "B", + "fieldPath": "H", "nullable": true, "type": { "type": { @@ -5077,7 +5077,7 @@ "isPartOfKey": false }, { - "fieldPath": "A", + "fieldPath": "I", "nullable": true, "type": { "type": { @@ -5089,7 +5089,7 @@ "isPartOfKey": false }, { - "fieldPath": "I", + "fieldPath": "A", "nullable": true, "type": { "type": { @@ -5105,7 +5105,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5121,7 +5121,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5146,7 +5146,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5174,7 +5174,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5198,7 +5198,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5216,7 +5216,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5232,7 +5232,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5248,7 +5248,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5438,7 +5438,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5454,7 +5454,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5479,7 +5479,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5511,7 +5511,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5535,7 +5535,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5553,7 +5553,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5569,7 +5569,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5585,7 +5585,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5615,19 +5615,19 @@ }, "fields": [ { - "fieldPath": "DEPARTMENT_NAME", + "fieldPath": "LOCATION_ID", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "double(53)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "MANAGER_ID", + "fieldPath": "DEPARTMENT_ID", "nullable": true, "type": { "type": { @@ -5639,19 +5639,19 @@ "isPartOfKey": false }, { - "fieldPath": "DEPARTMENT_ID", + "fieldPath": "DEPARTMENT_NAME", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "double(53)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "LOCATION_ID", + "fieldPath": "MANAGER_ID", "nullable": true, "type": { "type": { @@ -5667,7 +5667,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5683,7 +5683,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5708,7 +5708,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5740,7 +5740,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5764,7 +5764,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5782,7 +5782,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5798,7 +5798,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5814,7 +5814,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5844,7 +5844,7 @@ }, "fields": [ { - "fieldPath": "cp_catalog_page_number", + "fieldPath": "cp_catalog_page_sk", "nullable": true, "type": { "type": { @@ -5856,7 +5856,7 @@ "isPartOfKey": false }, { - "fieldPath": "cp_type", + "fieldPath": "cp_catalog_page_id", "nullable": true, "type": { "type": { @@ -5868,19 +5868,19 @@ "isPartOfKey": false }, { - "fieldPath": "cp_description", + "fieldPath": "cp_start_date_sk", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cp_catalog_number", + "fieldPath": "cp_end_date_sk", "nullable": true, "type": { "type": { @@ -5904,7 +5904,7 @@ "isPartOfKey": false }, { - "fieldPath": "cp_end_date_sk", + "fieldPath": "cp_catalog_number", "nullable": true, "type": { "type": { @@ -5916,19 +5916,19 @@ "isPartOfKey": false }, { - "fieldPath": "cp_start_date_sk", + "fieldPath": "cp_description", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cp_catalog_page_id", + "fieldPath": "cp_type", "nullable": true, "type": { "type": { @@ -5940,7 +5940,7 @@ "isPartOfKey": false }, { - "fieldPath": "cp_catalog_page_sk", + "fieldPath": "cp_catalog_page_number", "nullable": true, "type": { "type": { @@ -5956,7 +5956,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5972,7 +5972,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -5997,7 +5997,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6037,7 +6037,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6066,22 +6066,22 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdby)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),createdby)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),version)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdon)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),version)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),createdon)" ], "confidenceScore": 1.0 }, @@ -6099,44 +6099,44 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdon)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),version)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),createdon)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),version)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdby)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),aspect)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),createdby)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),aspect)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdfor)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),createdfor)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),urn)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdfor)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD),createdfor)" ], "confidenceScore": 1.0 } @@ -6145,7 +6145,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6253,7 +6253,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6293,33 +6293,33 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD),id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD),path)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD),doubleVal)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD),path)" ], "confidenceScore": 1.0 } @@ -6328,7 +6328,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6357,66 +6357,66 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),priority)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),priority)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),company)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),company)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),company)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),last_name)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),company)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),last_name)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),last_name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),first_name)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),last_name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),first_name)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),first_name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),email_address)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),first_name)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),email_address)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),email_address)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),priority)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),email_address)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD),priority)" ], "confidenceScore": 1.0 } @@ -6425,7 +6425,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6454,33 +6454,33 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),customer_id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD),customer_id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),description)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD),description)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),description)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD),description)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD),id)" ], "confidenceScore": 1.0 } @@ -6489,7 +6489,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6564,7 +6564,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6738,7 +6738,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6767,44 +6767,44 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),DEPARTMENT_NAME)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),LOCATION_ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),DEPARTMENT_NAME)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),LOCATION_ID)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),MANAGER_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),DEPARTMENT_ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),MANAGER_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),DEPARTMENT_ID)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),DEPARTMENT_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),DEPARTMENT_NAME)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),DEPARTMENT_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),DEPARTMENT_NAME)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),LOCATION_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),MANAGER_ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),LOCATION_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),MANAGER_ID)" ], "confidenceScore": 1.0 } @@ -6813,7 +6813,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6842,33 +6842,33 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),H)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),B)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),H)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),B)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),G)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),C)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),G)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),C)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),F)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),D)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),F)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),D)" ], "confidenceScore": 1.0 }, @@ -6886,55 +6886,55 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),D)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),F)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),D)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),F)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),C)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),G)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),C)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),G)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),B)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),H)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),B)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),H)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),A)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),I)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),A)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),I)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),I)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),A)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),I)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),A)" ], "confidenceScore": 1.0 } @@ -6943,7 +6943,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -6972,44 +6972,44 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_number)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_sk)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_number)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_sk)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_type)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_type)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_description)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_start_date_sk)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_description)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_start_date_sk)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_number)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_end_date_sk)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_number)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_end_date_sk)" ], "confidenceScore": 1.0 }, @@ -7027,44 +7027,44 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_end_date_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_number)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_end_date_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_number)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_start_date_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_description)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_start_date_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_description)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_type)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_type)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_number)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_number)" ], "confidenceScore": 1.0 } @@ -7073,7 +7073,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7103,7 +7103,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7132,7 +7132,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7155,7 +7155,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7171,7 +7171,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7201,7 +7201,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7230,7 +7230,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7253,7 +7253,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7269,7 +7269,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7299,7 +7299,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7328,7 +7328,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7351,7 +7351,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7367,7 +7367,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7397,7 +7397,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7426,7 +7426,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7449,7 +7449,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7465,7 +7465,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7495,7 +7495,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7524,7 +7524,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7547,7 +7547,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7563,7 +7563,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7599,7 +7599,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD),id)" ], - "transformOperation": "COPY: `warehouse`.`id` AS `id`", + "transformOperation": "COPY: \"warehouse\".\"id\" AS \"id\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.test_folder.raw%2CPROD%29" }, @@ -7612,7 +7612,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD),name)" ], - "transformOperation": "COPY: `warehouse`.`name` AS `name`", + "transformOperation": "COPY: \"warehouse\".\"name\" AS \"name\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.test_folder.raw%2CPROD%29" }, @@ -7625,7 +7625,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD),age)" ], - "transformOperation": "COPY: `warehouse`.`age` AS `age`", + "transformOperation": "COPY: \"warehouse\".\"age\" AS \"age\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.test_folder.raw%2CPROD%29" }, @@ -7638,7 +7638,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD),salary)" ], - "transformOperation": "COPY: `warehouse`.`salary` AS `salary`", + "transformOperation": "COPY: \"warehouse\".\"salary\" AS \"salary\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.test_folder.raw%2CPROD%29" } @@ -7647,7 +7647,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7676,7 +7676,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7691,6 +7691,9 @@ { "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse,PROD)" }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD)" + }, { "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse,PROD),age)" }, @@ -7703,9 +7706,6 @@ { "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse,PROD),salary)" }, - { - "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD)" - }, { "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD),id)" }, @@ -7723,7 +7723,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -7739,63 +7739,111 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.warehouse,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProfile", + "aspectName": "upstreamLineage", "aspect": { "json": { - "timestampMillis": 1697353200000, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "salary", - "uniqueCount": 4, - "nullCount": 0, - "mean": "65000.0", - "stdev": "12909.944487358056" - }, + "upstreams": [ { - "fieldPath": "age", - "uniqueCount": 4, - "nullCount": 0, - "mean": "32.5", - "stdev": "6.454972243679028" - }, + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dremio,samples.samples.dremio.com.nyc-weather.csv,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.warehouse%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-h45omw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "customProperties": {}, + "statement": { + "value": "SELECT\n *\nFROM Samples.\"samples.dremio.com\".\"NYC-weather.csv\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-h45omw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ { - "fieldPath": "name", - "uniqueCount": 4, - "nullCount": 0 + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,samples.samples.dremio.com.nyc-weather.csv,PROD)" }, { - "fieldPath": "id", - "uniqueCount": 4, - "nullCount": 0, - "mean": "2.5", - "stdev": "1.2909944487358056" + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.warehouse,PROD)" } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.metadata_aspect,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -7805,27 +7853,42 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 0, - "columnCount": 4, + "rowCount": 2, + "columnCount": 7, "fieldProfiles": [ { - "fieldPath": "urn", - "uniqueCount": 0, + "fieldPath": "metadata", + "uniqueCount": 2, "nullCount": 0 }, { - "fieldPath": "doubleVal", - "uniqueCount": 0, + "fieldPath": "version", + "uniqueCount": 1, "nullCount": 0 }, { - "fieldPath": "path", - "uniqueCount": 0, + "fieldPath": "aspect", + "uniqueCount": 2, "nullCount": 0 }, { - "fieldPath": "id", + "fieldPath": "urn", + "uniqueCount": 1, + "nullCount": 0 + }, + { + "fieldPath": "createdby", + "uniqueCount": 1, + "nullCount": 0 + }, + { + "fieldPath": "createdfor", "uniqueCount": 0, + "nullCount": 2 + }, + { + "fieldPath": "createdon", + "uniqueCount": 1, "nullCount": 0 } ] @@ -7833,13 +7896,13 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.orders,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -7849,47 +7912,36 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 4, - "columnCount": 4, + "rowCount": 0, + "columnCount": 3, "fieldProfiles": [ { "fieldPath": "id", - "uniqueCount": 4, - "nullCount": 0, - "mean": "2.5", - "stdev": "1.2909944487358056" - }, - { - "fieldPath": "name", - "uniqueCount": 4, + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "age", - "uniqueCount": 4, - "nullCount": 0, - "mean": "32.5", - "stdev": "6.454972243679028" + "fieldPath": "description", + "uniqueCount": 0, + "nullCount": 0 }, { - "fieldPath": "salary", - "uniqueCount": 4, - "nullCount": 0, - "mean": "65000.0", - "stdev": "12909.944487358056" + "fieldPath": "customer_id", + "uniqueCount": 0, + "nullCount": 0 } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.warehouse,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.raw,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -7899,66 +7951,47 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 3834, - "columnCount": 9, + "rowCount": 4, + "columnCount": 4, "fieldProfiles": [ { - "fieldPath": "D", - "uniqueCount": 76, - "nullCount": 0 - }, - { - "fieldPath": "F", - "uniqueCount": 61, - "nullCount": 0 - }, - { - "fieldPath": "G", - "uniqueCount": 40, - "nullCount": 0 - }, - { - "fieldPath": "H", - "uniqueCount": 91, - "nullCount": 0 - }, - { - "fieldPath": "I", - "uniqueCount": 85, - "nullCount": 0 - }, - { - "fieldPath": "E", - "uniqueCount": 192, - "nullCount": 0 + "fieldPath": "salary", + "uniqueCount": 4, + "nullCount": 0, + "mean": "65000.0", + "stdev": "12909.944487358056" }, { - "fieldPath": "A", - "uniqueCount": 2, - "nullCount": 0 + "fieldPath": "age", + "uniqueCount": 4, + "nullCount": 0, + "mean": "32.5", + "stdev": "6.454972243679028" }, { - "fieldPath": "B", - "uniqueCount": 2, + "fieldPath": "name", + "uniqueCount": 4, "nullCount": 0 }, { - "fieldPath": "C", - "uniqueCount": 3834, - "nullCount": 0 + "fieldPath": "id", + "uniqueCount": 4, + "nullCount": 0, + "mean": "2.5", + "stdev": "1.2909944487358056" } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.metadata_aspect,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -7968,42 +8001,42 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 2, + "rowCount": 0, "columnCount": 7, "fieldProfiles": [ { - "fieldPath": "version", - "uniqueCount": 1, + "fieldPath": "doubleVal", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "metadata", - "uniqueCount": 2, + "fieldPath": "id", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "createdon", - "uniqueCount": 1, + "fieldPath": "urn", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "createdby", - "uniqueCount": 1, + "fieldPath": "aspect", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "createdfor", + "fieldPath": "path", "uniqueCount": 0, - "nullCount": 2 + "nullCount": 0 }, { - "fieldPath": "urn", - "uniqueCount": 1, + "fieldPath": "longVal", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "aspect", - "uniqueCount": 2, + "fieldPath": "stringVal", + "uniqueCount": 0, "nullCount": 0 } ] @@ -8011,13 +8044,13 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.orders,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.metadata_index_view,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8028,20 +8061,25 @@ "type": "FULL_TABLE" }, "rowCount": 0, - "columnCount": 3, + "columnCount": 4, "fieldProfiles": [ { - "fieldPath": "id", + "fieldPath": "doubleVal", "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "description", + "fieldPath": "path", "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "customer_id", + "fieldPath": "urn", + "uniqueCount": 0, + "nullCount": 0 + }, + { + "fieldPath": "id", "uniqueCount": 0, "nullCount": 0 } @@ -8050,13 +8088,13 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.metadata_index_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.warehouse,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8066,27 +8104,52 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 0, - "columnCount": 4, + "rowCount": 3834, + "columnCount": 9, "fieldProfiles": [ { - "fieldPath": "id", - "uniqueCount": 0, + "fieldPath": "G", + "uniqueCount": 40, "nullCount": 0 }, { - "fieldPath": "urn", - "uniqueCount": 0, + "fieldPath": "F", + "uniqueCount": 61, "nullCount": 0 }, { - "fieldPath": "path", - "uniqueCount": 0, + "fieldPath": "A", + "uniqueCount": 2, "nullCount": 0 }, { - "fieldPath": "doubleVal", - "uniqueCount": 0, + "fieldPath": "B", + "uniqueCount": 2, + "nullCount": 0 + }, + { + "fieldPath": "C", + "uniqueCount": 3834, + "nullCount": 0 + }, + { + "fieldPath": "E", + "uniqueCount": 192, + "nullCount": 0 + }, + { + "fieldPath": "D", + "uniqueCount": 76, + "nullCount": 0 + }, + { + "fieldPath": "I", + "uniqueCount": 85, + "nullCount": 0 + }, + { + "fieldPath": "H", + "uniqueCount": 91, "nullCount": 0 } ] @@ -8094,13 +8157,13 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.metadata_index,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.customers,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8110,56 +8173,55 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 0, - "columnCount": 7, + "rowCount": 5, + "columnCount": 6, "fieldProfiles": [ { "fieldPath": "id", - "uniqueCount": 0, - "nullCount": 0 - }, - { - "fieldPath": "urn", - "uniqueCount": 0, - "nullCount": 0 + "uniqueCount": 5, + "nullCount": 0, + "mean": "3.0", + "stdev": "1.5811388300841898" }, { - "fieldPath": "aspect", - "uniqueCount": 0, + "fieldPath": "email_address", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "path", - "uniqueCount": 0, + "fieldPath": "first_name", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "longVal", - "uniqueCount": 0, + "fieldPath": "last_name", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "stringVal", - "uniqueCount": 0, + "fieldPath": "company", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "doubleVal", - "uniqueCount": 0, - "nullCount": 0 + "fieldPath": "priority", + "uniqueCount": 3, + "nullCount": 1, + "mean": "4.175000011920929", + "stdev": "0.4924429489953036" } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8169,56 +8231,55 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 0, - "columnCount": 7, + "rowCount": 5, + "columnCount": 6, "fieldProfiles": [ - { - "fieldPath": "doubleVal", - "uniqueCount": 0, - "nullCount": 0 - }, { "fieldPath": "id", - "uniqueCount": 0, - "nullCount": 0 + "uniqueCount": 5, + "nullCount": 0, + "mean": "3.0", + "stdev": "1.5811388300841898" }, { - "fieldPath": "urn", - "uniqueCount": 0, + "fieldPath": "company", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "aspect", - "uniqueCount": 0, + "fieldPath": "last_name", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "path", - "uniqueCount": 0, + "fieldPath": "first_name", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "longVal", - "uniqueCount": 0, + "fieldPath": "email_address", + "uniqueCount": 5, "nullCount": 0 }, { - "fieldPath": "stringVal", - "uniqueCount": 0, - "nullCount": 0 + "fieldPath": "priority", + "uniqueCount": 3, + "nullCount": 1, + "mean": "4.175000011920929", + "stdev": "0.4924429489953036" } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.customers,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.s3.warehouse,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8228,55 +8289,47 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 5, - "columnCount": 6, + "rowCount": 4, + "columnCount": 4, "fieldProfiles": [ - { - "fieldPath": "first_name", - "uniqueCount": 5, - "nullCount": 0 - }, - { - "fieldPath": "last_name", - "uniqueCount": 5, - "nullCount": 0 - }, { "fieldPath": "id", - "uniqueCount": 5, + "uniqueCount": 4, "nullCount": 0, - "mean": "3.0", - "stdev": "1.5811388300841898" + "mean": "2.5", + "stdev": "1.2909944487358056" }, { - "fieldPath": "company", - "uniqueCount": 5, + "fieldPath": "name", + "uniqueCount": 4, "nullCount": 0 }, { - "fieldPath": "priority", - "uniqueCount": 3, - "nullCount": 1, - "mean": "4.175000011920929", - "stdev": "0.4924429489953036" + "fieldPath": "age", + "uniqueCount": 4, + "nullCount": 0, + "mean": "32.5", + "stdev": "6.454972243679028" }, { - "fieldPath": "email_address", - "uniqueCount": 5, - "nullCount": 0 + "fieldPath": "salary", + "uniqueCount": 4, + "nullCount": 0, + "mean": "65000.0", + "stdev": "12909.944487358056" } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.customers,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_index_view,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8286,41 +8339,66 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 5, - "columnCount": 6, + "rowCount": 0, + "columnCount": 4, "fieldProfiles": [ { - "fieldPath": "priority", - "uniqueCount": 3, - "nullCount": 1, - "mean": "4.175000011920929", - "stdev": "0.4924429489953036" + "fieldPath": "urn", + "uniqueCount": 0, + "nullCount": 0 }, { "fieldPath": "id", - "uniqueCount": 5, - "nullCount": 0, - "mean": "3.0", - "stdev": "1.5811388300841898" + "uniqueCount": 0, + "nullCount": 0 }, { - "fieldPath": "company", - "uniqueCount": 5, + "fieldPath": "doubleVal", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "last_name", - "uniqueCount": 5, + "fieldPath": "path", + "uniqueCount": 0, + "nullCount": 0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-h45omw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1697353200000, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "rowCount": 0, + "columnCount": 3, + "fieldProfiles": [ + { + "fieldPath": "customer_id", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "first_name", - "uniqueCount": 5, + "fieldPath": "description", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "email_address", - "uniqueCount": 5, + "fieldPath": "id", + "uniqueCount": 0, "nullCount": 0 } ] @@ -8328,13 +8406,13 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.space.test_folder.metadata_index,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8344,42 +8422,42 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 2, + "rowCount": 0, "columnCount": 7, "fieldProfiles": [ { - "fieldPath": "aspect", - "uniqueCount": 2, + "fieldPath": "id", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "version", - "uniqueCount": 1, + "fieldPath": "urn", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "metadata", - "uniqueCount": 2, + "fieldPath": "aspect", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "createdon", - "uniqueCount": 1, + "fieldPath": "path", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "createdby", - "uniqueCount": 1, + "fieldPath": "longVal", + "uniqueCount": 0, "nullCount": 0 }, { - "fieldPath": "createdfor", + "fieldPath": "stringVal", "uniqueCount": 0, - "nullCount": 2 + "nullCount": 0 }, { - "fieldPath": "urn", - "uniqueCount": 1, + "fieldPath": "doubleVal", + "uniqueCount": 0, "nullCount": 0 } ] @@ -8387,13 +8465,13 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.northwind.orders,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,dremio.mysql.metagalaxy.metadata_aspect,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -8403,30 +8481,50 @@ "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, - "rowCount": 0, - "columnCount": 3, + "rowCount": 2, + "columnCount": 7, "fieldProfiles": [ { - "fieldPath": "id", - "uniqueCount": 0, + "fieldPath": "createdby", + "uniqueCount": 1, "nullCount": 0 }, { - "fieldPath": "customer_id", - "uniqueCount": 0, + "fieldPath": "createdon", + "uniqueCount": 1, "nullCount": 0 }, { - "fieldPath": "description", - "uniqueCount": 0, + "fieldPath": "metadata", + "uniqueCount": 2, + "nullCount": 0 + }, + { + "fieldPath": "version", + "uniqueCount": 1, + "nullCount": 0 + }, + { + "fieldPath": "aspect", + "uniqueCount": 2, + "nullCount": 0 + }, + { + "fieldPath": "urn", + "uniqueCount": 1, "nullCount": 0 + }, + { + "fieldPath": "createdfor", + "uniqueCount": 0, + "nullCount": 2 } ] } }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -8442,7 +8540,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -8458,7 +8556,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -8474,7 +8572,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -8490,7 +8588,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -8506,7 +8604,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } }, @@ -8522,7 +8620,23 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-q0irxp", + "runId": "dremio-2023_10_15-07_00_00-h45omw", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Cdremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-h45omw", "lastRunId": "no-run-id-provided" } } diff --git a/metadata-ingestion/tests/integration/dremio/dremio_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dremio/dremio_platform_instance_mces_golden.json index 913524dfe958..4f67184ab46b 100644 --- a/metadata-ingestion/tests/integration/dremio/dremio_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dremio/dremio_platform_instance_mces_golden.json @@ -15,7 +15,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -32,7 +32,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -50,7 +50,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -66,7 +66,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -90,7 +90,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -110,7 +110,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -127,7 +127,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -145,7 +145,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -161,7 +161,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -185,7 +185,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -205,7 +205,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -222,7 +222,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -240,7 +240,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -256,7 +256,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -280,7 +280,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -300,7 +300,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -317,7 +317,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -335,7 +335,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -351,7 +351,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -375,7 +375,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -395,7 +395,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -412,7 +412,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -430,7 +430,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -446,7 +446,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -470,7 +470,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -490,7 +490,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -506,7 +506,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -523,7 +523,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -541,7 +541,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -557,7 +557,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -585,7 +585,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -605,7 +605,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -621,7 +621,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -638,7 +638,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -656,7 +656,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -672,7 +672,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -700,7 +700,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -720,7 +720,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -736,7 +736,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -753,7 +753,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -771,7 +771,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -787,7 +787,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -815,7 +815,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -835,7 +835,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -851,7 +851,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -868,7 +868,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -886,7 +886,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -902,7 +902,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -930,7 +930,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -950,7 +950,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -966,7 +966,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -983,7 +983,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1001,7 +1001,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1017,7 +1017,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1045,7 +1045,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1065,7 +1065,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1081,7 +1081,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1098,7 +1098,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1116,7 +1116,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1132,7 +1132,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1160,7 +1160,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1180,7 +1180,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1196,7 +1196,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1213,7 +1213,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1231,7 +1231,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1247,7 +1247,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1275,7 +1275,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1295,7 +1295,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1311,7 +1311,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1328,7 +1328,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1346,7 +1346,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1362,7 +1362,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1390,7 +1390,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1410,7 +1410,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1426,7 +1426,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1443,7 +1443,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1461,7 +1461,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1477,7 +1477,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1509,7 +1509,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1529,7 +1529,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1545,7 +1545,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1562,7 +1562,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1580,7 +1580,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1596,7 +1596,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1628,7 +1628,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1648,7 +1648,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1664,7 +1664,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1681,7 +1681,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1699,7 +1699,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1715,7 +1715,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1751,7 +1751,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1771,7 +1771,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1787,7 +1787,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1804,7 +1804,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1822,7 +1822,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1838,7 +1838,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1878,7 +1878,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1902,7 +1902,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1920,7 +1920,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1937,7 +1937,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1953,7 +1953,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -1971,7 +1971,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2001,7 +2001,7 @@ }, "fields": [ { - "fieldPath": "B", + "fieldPath": "A", "nullable": true, "type": { "type": { @@ -2013,7 +2013,7 @@ "isPartOfKey": false }, { - "fieldPath": "C", + "fieldPath": "I", "nullable": true, "type": { "type": { @@ -2025,7 +2025,7 @@ "isPartOfKey": false }, { - "fieldPath": "D", + "fieldPath": "H", "nullable": true, "type": { "type": { @@ -2037,7 +2037,7 @@ "isPartOfKey": false }, { - "fieldPath": "E", + "fieldPath": "G", "nullable": true, "type": { "type": { @@ -2061,7 +2061,7 @@ "isPartOfKey": false }, { - "fieldPath": "A", + "fieldPath": "E", "nullable": true, "type": { "type": { @@ -2073,7 +2073,7 @@ "isPartOfKey": false }, { - "fieldPath": "G", + "fieldPath": "D", "nullable": true, "type": { "type": { @@ -2085,7 +2085,7 @@ "isPartOfKey": false }, { - "fieldPath": "H", + "fieldPath": "C", "nullable": true, "type": { "type": { @@ -2097,7 +2097,7 @@ "isPartOfKey": false }, { - "fieldPath": "I", + "fieldPath": "B", "nullable": true, "type": { "type": { @@ -2113,7 +2113,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2129,7 +2129,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2157,7 +2157,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2181,7 +2181,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2199,7 +2199,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2216,7 +2216,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2232,7 +2232,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2250,7 +2250,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2280,31 +2280,31 @@ }, "fields": [ { - "fieldPath": "id", + "fieldPath": "priority", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer(32)", + "nativeDataType": "float(24)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "company", + "fieldPath": "id", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "integer(32)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "last_name", + "fieldPath": "company", "nullable": true, "type": { "type": { @@ -2316,7 +2316,7 @@ "isPartOfKey": false }, { - "fieldPath": "first_name", + "fieldPath": "last_name", "nullable": true, "type": { "type": { @@ -2328,7 +2328,7 @@ "isPartOfKey": false }, { - "fieldPath": "email_address", + "fieldPath": "first_name", "nullable": true, "type": { "type": { @@ -2340,14 +2340,14 @@ "isPartOfKey": false }, { - "fieldPath": "priority", + "fieldPath": "email_address", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "float(24)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false } @@ -2356,7 +2356,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2372,7 +2372,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2404,7 +2404,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2428,7 +2428,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2446,7 +2446,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2463,7 +2463,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2479,7 +2479,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2497,7 +2497,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2527,7 +2527,7 @@ }, "fields": [ { - "fieldPath": "urn", + "fieldPath": "aspect", "nullable": true, "type": { "type": { @@ -2539,7 +2539,7 @@ "isPartOfKey": false }, { - "fieldPath": "createdfor", + "fieldPath": "metadata", "nullable": true, "type": { "type": { @@ -2551,31 +2551,31 @@ "isPartOfKey": false }, { - "fieldPath": "createdby", + "fieldPath": "createdon", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "timestamp(23)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "createdon", + "fieldPath": "createdfor", "nullable": true, "type": { "type": { - "com.linkedin.schema.DateType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "timestamp(23)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "metadata", + "fieldPath": "createdby", "nullable": true, "type": { "type": { @@ -2599,7 +2599,7 @@ "isPartOfKey": false }, { - "fieldPath": "aspect", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -2615,7 +2615,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2631,7 +2631,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2663,7 +2663,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2687,7 +2687,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2705,7 +2705,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2722,7 +2722,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2738,7 +2738,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2756,7 +2756,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2786,31 +2786,31 @@ }, "fields": [ { - "fieldPath": "id", + "fieldPath": "stringVal", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "longVal", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "aspect", + "fieldPath": "path", "nullable": true, "type": { "type": { @@ -2822,31 +2822,31 @@ "isPartOfKey": false }, { - "fieldPath": "path", + "fieldPath": "id", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "longVal", + "fieldPath": "aspect", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "stringVal", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -2874,7 +2874,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2890,7 +2890,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2922,7 +2922,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2946,7 +2946,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2964,7 +2964,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2981,7 +2981,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -2997,7 +2997,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3015,7 +3015,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3045,50 +3045,50 @@ }, "fields": [ { - "fieldPath": "doubleVal", + "fieldPath": "urn", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "double(53)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "id", + "fieldPath": "path", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "doubleVal", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "double(53)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "path", + "fieldPath": "id", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false } @@ -3097,7 +3097,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3113,7 +3113,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3145,7 +3145,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3169,7 +3169,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3187,7 +3187,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3204,7 +3204,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3220,7 +3220,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3238,7 +3238,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3268,26 +3268,26 @@ }, "fields": [ { - "fieldPath": "customer_id", + "fieldPath": "description", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer(32)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "description", + "fieldPath": "customer_id", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "integer(32)", "recursive": false, "isPartOfKey": false }, @@ -3308,7 +3308,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3324,7 +3324,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3356,7 +3356,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3380,7 +3380,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3398,7 +3398,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3415,7 +3415,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3431,7 +3431,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3449,7 +3449,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3479,19 +3479,19 @@ }, "fields": [ { - "fieldPath": "age", + "fieldPath": "name", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "salary", + "fieldPath": "age", "nullable": true, "type": { "type": { @@ -3503,14 +3503,14 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "salary", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, @@ -3531,7 +3531,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3547,7 +3547,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3579,7 +3579,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3603,7 +3603,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3621,7 +3621,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3638,7 +3638,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3654,7 +3654,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3736,7 +3736,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3752,7 +3752,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3777,7 +3777,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3805,7 +3805,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3829,7 +3829,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3847,7 +3847,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3864,7 +3864,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3880,7 +3880,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -3910,26 +3910,26 @@ }, "fields": [ { - "fieldPath": "version", + "fieldPath": "urn", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "createdon", + "fieldPath": "createdfor", "nullable": true, "type": { "type": { - "com.linkedin.schema.DateType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "timestamp(23)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, @@ -3946,14 +3946,14 @@ "isPartOfKey": false }, { - "fieldPath": "createdfor", + "fieldPath": "createdon", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "timestamp(23)", "recursive": false, "isPartOfKey": false }, @@ -3970,14 +3970,14 @@ "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "version", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, @@ -3998,7 +3998,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4014,7 +4014,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4039,7 +4039,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4071,7 +4071,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4095,7 +4095,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4113,7 +4113,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4130,7 +4130,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4146,7 +4146,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4176,19 +4176,19 @@ }, "fields": [ { - "fieldPath": "path", + "fieldPath": "doubleVal", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "double(53)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "longVal", + "fieldPath": "id", "nullable": true, "type": { "type": { @@ -4200,7 +4200,7 @@ "isPartOfKey": false }, { - "fieldPath": "stringVal", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -4212,43 +4212,43 @@ "isPartOfKey": false }, { - "fieldPath": "doubleVal", + "fieldPath": "aspect", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "double(53)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "id", + "fieldPath": "path", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "longVal", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "aspect", + "fieldPath": "stringVal", "nullable": true, "type": { "type": { @@ -4264,7 +4264,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4280,7 +4280,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4305,7 +4305,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4337,7 +4337,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4361,7 +4361,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4379,7 +4379,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4396,7 +4396,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4412,7 +4412,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4442,19 +4442,19 @@ }, "fields": [ { - "fieldPath": "id", + "fieldPath": "doubleVal", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "double(53)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "urn", + "fieldPath": "path", "nullable": true, "type": { "type": { @@ -4466,7 +4466,7 @@ "isPartOfKey": false }, { - "fieldPath": "path", + "fieldPath": "urn", "nullable": true, "type": { "type": { @@ -4478,14 +4478,14 @@ "isPartOfKey": false }, { - "fieldPath": "doubleVal", + "fieldPath": "id", "nullable": true, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "double(53)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false } @@ -4494,7 +4494,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4510,7 +4510,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4535,7 +4535,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4567,7 +4567,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4591,7 +4591,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4609,7 +4609,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4626,7 +4626,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4642,7 +4642,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4748,7 +4748,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4764,7 +4764,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4789,7 +4789,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4821,7 +4821,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4845,7 +4845,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4863,7 +4863,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4880,7 +4880,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4896,7 +4896,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4966,7 +4966,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -4982,7 +4982,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5007,7 +5007,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5039,7 +5039,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5063,7 +5063,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5081,7 +5081,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5098,7 +5098,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5114,7 +5114,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5144,7 +5144,7 @@ }, "fields": [ { - "fieldPath": "E", + "fieldPath": "B", "nullable": true, "type": { "type": { @@ -5156,7 +5156,7 @@ "isPartOfKey": false }, { - "fieldPath": "G", + "fieldPath": "C", "nullable": true, "type": { "type": { @@ -5168,7 +5168,7 @@ "isPartOfKey": false }, { - "fieldPath": "H", + "fieldPath": "D", "nullable": true, "type": { "type": { @@ -5180,7 +5180,7 @@ "isPartOfKey": false }, { - "fieldPath": "I", + "fieldPath": "E", "nullable": true, "type": { "type": { @@ -5192,7 +5192,7 @@ "isPartOfKey": false }, { - "fieldPath": "F", + "fieldPath": "I", "nullable": true, "type": { "type": { @@ -5204,7 +5204,7 @@ "isPartOfKey": false }, { - "fieldPath": "A", + "fieldPath": "H", "nullable": true, "type": { "type": { @@ -5216,7 +5216,7 @@ "isPartOfKey": false }, { - "fieldPath": "B", + "fieldPath": "G", "nullable": true, "type": { "type": { @@ -5228,7 +5228,7 @@ "isPartOfKey": false }, { - "fieldPath": "C", + "fieldPath": "F", "nullable": true, "type": { "type": { @@ -5240,7 +5240,7 @@ "isPartOfKey": false }, { - "fieldPath": "D", + "fieldPath": "A", "nullable": true, "type": { "type": { @@ -5256,7 +5256,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5272,7 +5272,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5297,7 +5297,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5329,7 +5329,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5353,7 +5353,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5371,7 +5371,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5388,7 +5388,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5404,7 +5404,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5434,7 +5434,7 @@ }, "fields": [ { - "fieldPath": "F", + "fieldPath": "A", "nullable": true, "type": { "type": { @@ -5446,7 +5446,7 @@ "isPartOfKey": false }, { - "fieldPath": "G", + "fieldPath": "B", "nullable": true, "type": { "type": { @@ -5458,7 +5458,7 @@ "isPartOfKey": false }, { - "fieldPath": "H", + "fieldPath": "C", "nullable": true, "type": { "type": { @@ -5470,7 +5470,7 @@ "isPartOfKey": false }, { - "fieldPath": "I", + "fieldPath": "D", "nullable": true, "type": { "type": { @@ -5482,7 +5482,7 @@ "isPartOfKey": false }, { - "fieldPath": "J", + "fieldPath": "E", "nullable": true, "type": { "type": { @@ -5494,7 +5494,7 @@ "isPartOfKey": false }, { - "fieldPath": "K", + "fieldPath": "F", "nullable": true, "type": { "type": { @@ -5506,7 +5506,7 @@ "isPartOfKey": false }, { - "fieldPath": "L", + "fieldPath": "G", "nullable": true, "type": { "type": { @@ -5518,7 +5518,7 @@ "isPartOfKey": false }, { - "fieldPath": "M", + "fieldPath": "H", "nullable": true, "type": { "type": { @@ -5530,7 +5530,7 @@ "isPartOfKey": false }, { - "fieldPath": "A", + "fieldPath": "I", "nullable": true, "type": { "type": { @@ -5542,7 +5542,7 @@ "isPartOfKey": false }, { - "fieldPath": "B", + "fieldPath": "J", "nullable": true, "type": { "type": { @@ -5554,7 +5554,7 @@ "isPartOfKey": false }, { - "fieldPath": "C", + "fieldPath": "K", "nullable": true, "type": { "type": { @@ -5566,7 +5566,7 @@ "isPartOfKey": false }, { - "fieldPath": "D", + "fieldPath": "L", "nullable": true, "type": { "type": { @@ -5578,7 +5578,7 @@ "isPartOfKey": false }, { - "fieldPath": "E", + "fieldPath": "M", "nullable": true, "type": { "type": { @@ -5594,7 +5594,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5610,7 +5610,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5635,7 +5635,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5671,7 +5671,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5695,7 +5695,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5713,7 +5713,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5730,7 +5730,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5746,7 +5746,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5776,7 +5776,7 @@ }, "fields": [ { - "fieldPath": "DEPARTMENT_ID", + "fieldPath": "MANAGER_ID", "nullable": true, "type": { "type": { @@ -5800,7 +5800,7 @@ "isPartOfKey": false }, { - "fieldPath": "MANAGER_ID", + "fieldPath": "LOCATION_ID", "nullable": true, "type": { "type": { @@ -5812,7 +5812,7 @@ "isPartOfKey": false }, { - "fieldPath": "LOCATION_ID", + "fieldPath": "DEPARTMENT_ID", "nullable": true, "type": { "type": { @@ -5828,7 +5828,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5844,7 +5844,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5869,7 +5869,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5905,7 +5905,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5929,7 +5929,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5947,7 +5947,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5964,7 +5964,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -5980,7 +5980,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6010,7 +6010,7 @@ }, "fields": [ { - "fieldPath": "cp_catalog_page_sk", + "fieldPath": "cp_start_date_sk", "nullable": true, "type": { "type": { @@ -6022,26 +6022,26 @@ "isPartOfKey": false }, { - "fieldPath": "cp_catalog_page_id", + "fieldPath": "cp_catalog_page_sk", "nullable": true, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "character varying(65536)", + "nativeDataType": "bigint(64)", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "cp_start_date_sk", + "fieldPath": "cp_catalog_page_id", "nullable": true, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "bigint(64)", + "nativeDataType": "character varying(65536)", "recursive": false, "isPartOfKey": false }, @@ -6122,7 +6122,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6138,7 +6138,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6163,7 +6163,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6207,7 +6207,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6236,22 +6236,22 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),version)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),version)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),urn)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdon)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdfor)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),createdon)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),createdfor)" ], "confidenceScore": 1.0 }, @@ -6269,11 +6269,11 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdfor)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),createdon)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),createdfor)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),createdon)" ], "confidenceScore": 1.0 }, @@ -6291,11 +6291,11 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD),version)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_aspect,PROD),version)" ], "confidenceScore": 1.0 }, @@ -6315,7 +6315,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6344,77 +6344,77 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),path)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),path)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),doubleVal)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),longVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),longVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),stringVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),stringVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),urn)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),aspect)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),aspect)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),path)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),path)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),longVal)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),longVal)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),stringVal)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),aspect)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index,PROD),stringVal)" ], "confidenceScore": 1.0 } @@ -6423,7 +6423,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6452,44 +6452,44 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),doubleVal)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),urn)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),path)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),path)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),urn)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),doubleVal)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.mysql.metagalaxy.metadata_index_view,PROD),id)" ], "confidenceScore": 1.0 } @@ -6498,7 +6498,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6595,7 +6595,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6659,7 +6659,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6734,7 +6734,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6763,143 +6763,143 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),F)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),A)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),F)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),A)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),G)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),B)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),G)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),B)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),H)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),C)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),H)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),C)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),I)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),D)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),I)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),D)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),J)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),E)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),J)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),E)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),K)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),F)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),K)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),F)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),L)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),G)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),L)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),G)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),M)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),H)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),M)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),H)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),A)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),I)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),A)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),I)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),B)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),J)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),B)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),J)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),C)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),K)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),C)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),K)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),D)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),L)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),D)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),L)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),E)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/googleplaystore.csv,PROD),M)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),E)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.googleplaystore.csv,PROD),M)" ], "confidenceScore": 1.0 } @@ -6908,7 +6908,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -6937,11 +6937,11 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),DEPARTMENT_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),MANAGER_ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),DEPARTMENT_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),MANAGER_ID)" ], "confidenceScore": 1.0 }, @@ -6959,22 +6959,22 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),MANAGER_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),LOCATION_ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),MANAGER_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),LOCATION_ID)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),LOCATION_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/Dremio University/oracle-departments.xlsx,PROD),DEPARTMENT_ID)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),LOCATION_ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.dremio university.oracle-departments.xlsx,PROD),DEPARTMENT_ID)" ], "confidenceScore": 1.0 } @@ -6983,7 +6983,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7012,99 +7012,99 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),E)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),B)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),E)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),B)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),G)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),C)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),G)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),C)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),H)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),D)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),H)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),D)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),I)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),E)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),I)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),E)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),F)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),I)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),F)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),I)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),A)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),H)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),A)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),H)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),B)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),G)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),B)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),G)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),C)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),F)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),C)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),F)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),D)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/NYC-weather.csv,PROD),A)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),D)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.nyc-weather.csv,PROD),A)" ], "confidenceScore": 1.0 } @@ -7113,7 +7113,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7142,33 +7142,33 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_start_date_sk)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_start_date_sk)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_sk)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_sk)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_start_date_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,s3_test_samples./samples.dremio.com/tpcds_sf1000/catalog_page/1ab266d5-18eb-4780-711d-0fa337fa6c00/0_0_0.parquet,PROD),cp_catalog_page_id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_start_date_sk)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.samples.samples.dremio.com.tpcds_sf1000.catalog_page.1ab266d5-18eb-4780-711d-0fa337fa6c00.0_0_0.parquet,PROD),cp_catalog_page_id)" ], "confidenceScore": 1.0 }, @@ -7243,7 +7243,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7273,7 +7273,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7302,7 +7302,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7325,7 +7325,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7341,7 +7341,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7371,7 +7371,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7400,7 +7400,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7423,7 +7423,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7439,7 +7439,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7469,7 +7469,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7498,7 +7498,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7521,7 +7521,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7537,7 +7537,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7567,7 +7567,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7596,7 +7596,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7619,7 +7619,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7635,7 +7635,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7665,7 +7665,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7694,7 +7694,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7717,7 +7717,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7733,7 +7733,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7769,7 +7769,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD),id)" ], - "transformOperation": "COPY: `warehouse`.`id` AS `id`", + "transformOperation": "COPY: \"warehouse\".\"id\" AS \"id\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.test_folder.raw%2CPROD%29" }, @@ -7782,7 +7782,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD),name)" ], - "transformOperation": "COPY: `warehouse`.`name` AS `name`", + "transformOperation": "COPY: \"warehouse\".\"name\" AS \"name\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.test_folder.raw%2CPROD%29" }, @@ -7795,7 +7795,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD),age)" ], - "transformOperation": "COPY: `warehouse`.`age` AS `age`", + "transformOperation": "COPY: \"warehouse\".\"age\" AS \"age\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.test_folder.raw%2CPROD%29" }, @@ -7808,7 +7808,7 @@ "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD),salary)" ], - "transformOperation": "COPY: `warehouse`.`salary` AS `salary`", + "transformOperation": "COPY: \"warehouse\".\"salary\" AS \"salary\"", "confidenceScore": 0.9, "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.test_folder.raw%2CPROD%29" } @@ -7817,7 +7817,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7846,7 +7846,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7861,6 +7861,9 @@ { "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.s3.warehouse,PROD)" }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD)" + }, { "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.s3.warehouse,PROD),age)" }, @@ -7873,9 +7876,6 @@ { "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.s3.warehouse,PROD),salary)" }, - { - "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD)" - }, { "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.test_folder.raw,PROD),id)" }, @@ -7893,7 +7893,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7909,7 +7909,105 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.warehouse,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.samples.samples.dremio.com.nyc-weather.csv,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.warehouse%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-9zd01d", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "customProperties": {}, + "statement": { + "value": "SELECT\n *\nFROM Samples.\"samples.dremio.com\".\"NYC-weather.csv\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1697353200000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-9zd01d", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.samples.samples.dremio.com.nyc-weather.csv,PROD)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:dremio,test-platform.dremio.space.warehouse,PROD)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-9zd01d", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dremio" + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7925,7 +8023,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7941,7 +8039,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7957,7 +8055,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7973,7 +8071,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -7989,7 +8087,7 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } }, @@ -8005,7 +8103,23 @@ }, "systemMetadata": { "lastObserved": 1697353200000, - "runId": "dremio-2023_10_15-07_00_00-nilnyn", + "runId": "dremio-2023_10_15-07_00_00-9zd01d", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Adremio%2Ctest-platform.dremio.space.warehouse%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1697353200000, + "runId": "dremio-2023_10_15-07_00_00-9zd01d", "lastRunId": "no-run-id-provided" } } diff --git a/metadata-ingestion/tests/unit/api/test_auto_validate_input_fields.py b/metadata-ingestion/tests/unit/api/test_auto_validate_input_fields.py new file mode 100644 index 000000000000..736c9e55909d --- /dev/null +++ b/metadata-ingestion/tests/unit/api/test_auto_validate_input_fields.py @@ -0,0 +1,179 @@ +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.auto_work_units.auto_validate_input_fields import ( + ValidateInputFieldsProcessor, +) +from datahub.ingestion.api.source import SourceReport +from datahub.metadata.schema_classes import ( + InputFieldClass, + InputFieldsClass, + NumberTypeClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, +) + +DUMMY_CHART_URN = "urn:li:chart:(grafana,dashboard.123)" +DUMMY_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:grafana,dataset,PROD)" + + +def test_valid_input_fields_pass_through(): + """Test that valid input fields pass through unchanged.""" + report = SourceReport() + processor = ValidateInputFieldsProcessor(report) + + # Create input fields with valid fieldPath + input_fields = InputFieldsClass( + fields=[ + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath="valid_field_1", + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN},valid_field_1)", + ), + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath="valid_field_2", + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN},valid_field_2)", + ), + ] + ) + + mcpw = MetadataChangeProposalWrapper( + entityUrn=DUMMY_CHART_URN, + aspect=input_fields, + ) + + out = list(processor.validate_input_fields([mcpw.as_workunit()])) + + assert len(out) == 1 + result_aspect = out[0].get_aspect_of_type(InputFieldsClass) + assert result_aspect is not None + assert len(result_aspect.fields) == 2 + assert result_aspect.fields[0].schemaField is not None + assert result_aspect.fields[0].schemaField.fieldPath == "valid_field_1" + assert result_aspect.fields[1].schemaField is not None + assert result_aspect.fields[1].schemaField.fieldPath == "valid_field_2" + assert len(report.warnings) == 0 + + +def test_empty_field_paths_filtered(): + """Test that input fields with empty fieldPath values are filtered out.""" + report = SourceReport() + processor = ValidateInputFieldsProcessor(report) + + # Create mix of valid and invalid input fields + input_fields = InputFieldsClass( + fields=[ + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath="valid_field", + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN},valid_field)", + ), + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath="", # Empty fieldPath + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN},)", + ), + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath=" ", # Whitespace-only fieldPath + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN}, )", + ), + ] + ) + + mcpw = MetadataChangeProposalWrapper( + entityUrn=DUMMY_CHART_URN, + aspect=input_fields, + ) + + out = list(processor.validate_input_fields([mcpw.as_workunit()])) + + assert len(out) == 1 + result_aspect = out[0].get_aspect_of_type(InputFieldsClass) + assert result_aspect is not None + assert len(result_aspect.fields) == 1 + assert result_aspect.fields[0].schemaField is not None + assert result_aspect.fields[0].schemaField.fieldPath == "valid_field" + + # Verify warning was reported + assert len(report.warnings) == 1 + assert "Invalid input fields filtered" in str(report.warnings) + # Verify counter was incremented + assert report.num_input_fields_filtered == 2 + + +def test_all_invalid_fields_skips_workunit(): + """Test that when all fields are invalid, the workunit is not yielded.""" + report = SourceReport() + processor = ValidateInputFieldsProcessor(report) + + # Create only invalid input fields + input_fields = InputFieldsClass( + fields=[ + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath="", # Empty fieldPath + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN},)", + ), + InputFieldClass( + schemaField=SchemaFieldClass( + fieldPath=" ", # Whitespace-only fieldPath + type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + nativeDataType="number", + ), + schemaFieldUrn=f"urn:li:schemaField:({DUMMY_DATASET_URN}, )", + ), + ] + ) + + mcpw = MetadataChangeProposalWrapper( + entityUrn=DUMMY_CHART_URN, + aspect=input_fields, + ) + + out = list(processor.validate_input_fields([mcpw.as_workunit()])) + + # The workunit should not be yielded at all + assert len(out) == 0 + + # Verify warning was reported + assert len(report.warnings) == 1 + # Verify counter was incremented + assert report.num_input_fields_filtered == 2 + + +def test_no_input_fields_aspect(): + """Test that workunits without InputFieldsClass pass through unchanged.""" + report = SourceReport() + processor = ValidateInputFieldsProcessor(report) + + # Create workunit without InputFieldsClass + from datahub.metadata.schema_classes import StatusClass + + mcpw = MetadataChangeProposalWrapper( + entityUrn=DUMMY_CHART_URN, + aspect=StatusClass(removed=False), + ) + + out = list(processor.validate_input_fields([mcpw.as_workunit()])) + + assert len(out) == 1 + assert out[0].get_aspect_of_type(InputFieldsClass) is None + assert len(report.warnings) == 0 diff --git a/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py b/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py index 4a8dbafae754..694044caef0a 100644 --- a/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py +++ b/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py @@ -14,8 +14,8 @@ def test_ingestion_stage_context_records_duration(): pass assert len(report.ingestion_stage_durations) == 1 key = next(iter(report.ingestion_stage_durations.keys())) - assert key[0] == IngestionHighStage._UNDEFINED - assert "Test Stage" in key[1] + assert "Ingestion" in key + assert "Test Stage" in key def test_ingestion_stage_context_handles_exceptions(): @@ -27,7 +27,8 @@ def test_ingestion_stage_context_handles_exceptions(): pass assert len(report.ingestion_stage_durations) == 1 key = next(iter(report.ingestion_stage_durations.keys())) - assert "Test Stage" in key[1] + assert "Ingestion" in key + assert "Test Stage" in key def test_ingestion_stage_context_report_handles_multiple_stages(): @@ -45,9 +46,9 @@ def test_ingestion_stage_context_report_handles_multiple_stages(): ) sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) - assert "Test Stage 1" in sorted_stages[0][1] - assert "Test Stage 2" in sorted_stages[1][1] - assert "Test Stage 3" in sorted_stages[2][1] + assert "Test Stage 1" in sorted_stages[0] + assert "Test Stage 2" in sorted_stages[1] + assert "Test Stage 3" in sorted_stages[2] def test_ingestion_stage_context_report_handles_nested_stages(): @@ -64,14 +65,14 @@ def test_ingestion_stage_context_report_handles_nested_stages(): for duration in report.ingestion_stage_durations.values() ) sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) - assert "Inner1" in sorted_stages[0][1] - assert "Inner2" in sorted_stages[1][1] - assert "Outer" in sorted_stages[2][1] + assert "Inner1" in sorted_stages[0] + assert "Inner2" in sorted_stages[1] + assert "Outer" in sorted_stages[2] # Check that outer stage duration >= sum of inner stage durations - outer_key = [k for k in report.ingestion_stage_durations if "Outer" in k[1]][0] - inner1_key = [k for k in report.ingestion_stage_durations if "Inner1" in k[1]][0] - inner2_key = [k for k in report.ingestion_stage_durations if "Inner2" in k[1]][0] + outer_key = [k for k in report.ingestion_stage_durations if "Outer" in k][0] + inner1_key = [k for k in report.ingestion_stage_durations if "Inner1" in k][0] + inner2_key = [k for k in report.ingestion_stage_durations if "Inner2" in k][0] outer_duration = report.ingestion_stage_durations[outer_key] inner1_duration = report.ingestion_stage_durations[inner1_key] @@ -96,6 +97,6 @@ def test_ingestion_stage_with_high_stage(): time.sleep(0.1) assert len(report.ingestion_stage_durations) == 1 key = next(iter(report.ingestion_stage_durations.keys())) - assert key[0] == IngestionHighStage.PROFILING - assert "Test Stage" in key[1] + assert "Profiling" in key + assert "Test Stage" in key assert report.ingestion_high_stage_seconds[IngestionHighStage.PROFILING] > 0 diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_dremio_quoted_identifiers.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_dremio_quoted_identifiers.json new file mode 100644 index 000000000000..50ce6c8326c9 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_dremio_quoted_identifiers.json @@ -0,0 +1,75 @@ +{ + "query_type": "SELECT", + "query_type_props": {}, + "query_fingerprint": "51225b224793d6d8f175d540854116eae88bc3897f5d5f62419f9cd425e1dd15", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.customers,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.orders,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "order_id", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.orders,PROD)", + "column": "order_id" + } + ], + "logic": { + "is_direct_copy": true, + "column_logic": "\"orders\".\"order_id\" AS \"order_id\"" + } + }, + { + "downstream": { + "table": null, + "column": "customer_name", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.customers,PROD)", + "column": "customer_name" + } + ], + "logic": { + "is_direct_copy": true, + "column_logic": "\"customers\".\"customer_name\" AS \"customer_name\"" + } + } + ], + "joins": [ + { + "join_type": "JOIN", + "left_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.orders,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.customers,PROD)" + ], + "right_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.customers,PROD)" + ], + "on_clause": "\"cte_orders\".\"customer_id\" = \"customers\".\"customer_id\"", + "columns_involved": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.customers,PROD)", + "column": "customer_id" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:dremio,mysource.sales.orders,PROD)", + "column": "customer_id" + } + ] + } + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "WITH \"cte_orders\" AS (SELECT * FROM \"MySource\".\"sales\".\"orders\" WHERE \"status\" = ?) SELECT \"cte_orders\".\"order_id\", \"customers\".\"customer_name\" FROM \"cte_orders\" JOIN \"MySource\".\"sales\".\"customers\" ON \"cte_orders\".\"customer_id\" = \"customers\".\"customer_id\"" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index ce97ecf13f22..98415991bd50 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -1574,3 +1574,22 @@ def test_natural_join() -> None: }, expected_file=RESOURCE_DIR / "test_natural_join.json", ) + + +def test_dremio_quoted_identifiers() -> None: + # Test that Dremio SQL with quoted identifiers parses correctly. + # This is a regression test for the issue where Dremio was mapped to the + # "drill" dialect, which didn't support quoted identifiers properly. + assert_sql_result( + """\ +WITH "cte_orders" AS ( + SELECT * FROM "MySource"."sales"."orders" + WHERE "status" = 'completed' +) +SELECT "cte_orders"."order_id", "customers"."customer_name" +FROM "cte_orders" +JOIN "MySource"."sales"."customers" ON "cte_orders"."customer_id" = "customers"."customer_id" +""", + dialect="dremio", + expected_file=RESOURCE_DIR / "test_dremio_quoted_identifiers.json", + ) diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 539e95728fc8..4492573826c9 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -127,6 +127,9 @@ dependencies { implementation(externalDependency.grpcProtobuf) { because("CVE-2023-1428, CVE-2023-32731") } + implementation(externalDependency.grpcNettyShaded) { + because("Security vulnerability in grpc-netty-shaded 1.68.3") + } } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/PolicyFieldTypeValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/PolicyFieldTypeValidator.java new file mode 100644 index 000000000000..6dd731022dc0 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/validation/PolicyFieldTypeValidator.java @@ -0,0 +1,86 @@ +package com.linkedin.metadata.aspect.validation; + +import com.datahub.authorization.EntityFieldType; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.policy.DataHubPolicyInfo; +import com.linkedin.policy.PolicyMatchCriterion; +import com.linkedin.policy.PolicyMatchFilter; +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** + * Validator to ensure that policy filter field types and privilege constraint field types are valid + * EntityFieldType enum values. + */ +@Slf4j +@Setter +@Getter +@Accessors(chain = true) +public class PolicyFieldTypeValidator extends AspectPayloadValidator { + @Nonnull private AspectPluginConfig config; + + private static final Set VALID_ENTITY_FIELD_TYPES = + Arrays.stream(EntityFieldType.values()) + .map(EntityFieldType::name) + .collect(Collectors.toSet()); + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + + mcpItems.forEach( + item -> { + DataHubPolicyInfo policyInfo = item.getAspect(DataHubPolicyInfo.class); + if (policyInfo != null && policyInfo.hasResources()) { + if (policyInfo.getResources().hasFilter()) { + validateFilter(item, policyInfo.getResources().getFilter(), exceptions); + } + if (policyInfo.getResources().hasPrivilegeConstraints()) { + validateFilter(item, policyInfo.getResources().getPrivilegeConstraints(), exceptions); + } + } + }); + + return exceptions.streamAllExceptions(); + } + + private void validateFilter( + BatchItem item, PolicyMatchFilter filter, ValidationExceptionCollection exceptions) { + if (filter != null && filter.hasCriteria()) { + for (PolicyMatchCriterion criterion : filter.getCriteria()) { + String field = criterion.getField(); + if (!VALID_ENTITY_FIELD_TYPES.contains(field)) { + exceptions.addException( + AspectValidationException.forItem( + item, + String.format( + "Invalid field type '%s'. Must be one of: %s", + field, String.join(", ", VALID_ENTITY_FIELD_TYPES)))); + } + } + } + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index b8dc6c314d4c..7182f478a27b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -561,6 +561,21 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) @Nonnull @Override public PartitionedStream streamAspectBatches(final RestoreIndicesArgs args) { + // Use default for existing RestoreIndices operations + return streamAspectBatches(args, null); + } + + /** + * Stream aspects ordered by URN/aspect for optimal Elasticsearch document batching. Supports + * configurable transaction isolation level to optimize for different use cases: - LoadIndices can + * use READ_UNCOMMITTED for faster scanning + * + * @param args Stream arguments and filters + * @param isolationLevel Optional isolation level override (null = database default) + * @return PartitionedStream of aspects ordered by URN/aspect + */ + public PartitionedStream streamAspectBatches( + final RestoreIndicesArgs args, final TxIsolation isolationLevel) { ExpressionList exp = server .find(EbeanAspectV2.class) @@ -612,15 +627,32 @@ public PartitionedStream streamAspectBatches(final RestoreIndices exp = exp.setMaxRows(args.limit); } - return PartitionedStream.builder() - .delegateStream( + // Execute with specific transaction isolation level + Stream stream; + if (isolationLevel == TxIsolation.READ_UNCOMMITTED) { + // Use explicit transaction scope for READ_UNCOMMITTED to override default + try (Transaction transaction = + server.beginTransaction(TxScope.requiresNew().setIsolation(isolationLevel))) { + stream = exp.orderBy() .asc(EbeanAspectV2.URN_COLUMN) .orderBy() .asc(EbeanAspectV2.ASPECT_COLUMN) .setFirstRow(start) - .findStream()) - .build(); + .findStream(); // Transaction auto-closes when stream completes + } + } else { + // For READ_COMMITTED and other levels, use standard approach + stream = + exp.orderBy() + .asc(EbeanAspectV2.URN_COLUMN) + .orderBy() + .asc(EbeanAspectV2.ASPECT_COLUMN) + .setFirstRow(start) + .findStream(); + } + + return PartitionedStream.builder().delegateStream(stream).build(); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 75060de0db23..ce1597d82e3e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -17,10 +17,11 @@ import org.apache.commons.lang3.NotImplementedException; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; +import org.springframework.beans.factory.DisposableBean; /** A search DAO for Elasticsearch backend. */ @Slf4j -public class ESGraphQueryDAO implements GraphQueryDAO { +public class ESGraphQueryDAO implements GraphQueryDAO, DisposableBean { private final GraphQueryBaseDAO delegate; @Getter private final GraphServiceConfiguration graphServiceConfig; @@ -97,4 +98,12 @@ public SearchResponse getSearchResponse( SearchResponse executeSearch(@Nonnull SearchRequest searchRequest) { return delegate.executeSearch(searchRequest); } + + @Override + public void destroy() throws Exception { + // Shutdown the delegate if it's a GraphQueryPITDAO + if (delegate instanceof GraphQueryPITDAO) { + ((GraphQueryPITDAO) delegate).shutdown(); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryBaseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryBaseDAO.java index 4ed6242b4c8e..665d444c6b0e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryBaseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryBaseDAO.java @@ -1490,7 +1490,7 @@ private List scrollLineageSearchWithMaxRelations( Set entityUrns) { int defaultPageSize = graphServiceConfig.getLimit().getResults().getApiDefault(); - int slices = config.getSearch().getGraph().getImpact().getSlices(); + int slices = Math.max(2, config.getSearch().getGraph().getImpact().getSlices()); return searchWithSlices( opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAO.java index 499d3d0adf97..f5a3b51927a9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAO.java @@ -19,6 +19,10 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -36,6 +40,8 @@ public class GraphQueryPITDAO extends GraphQueryBaseDAO { @Getter private final SearchClientShim client; + final ExecutorService pitExecutor; + public GraphQueryPITDAO( SearchClientShim client, GraphServiceConfiguration graphServiceConfig, @@ -43,6 +49,46 @@ public GraphQueryPITDAO( MetricUtils metricUtils) { super(graphServiceConfig, config, metricUtils); this.client = client; + + // Create dedicated thread pool for PIT operations + int maxThreads = config.getSearch().getGraph().getMaxThreads(); + this.pitExecutor = + new ThreadPoolExecutor( + maxThreads, // core pool size + maxThreads, // maximum pool size + 60L, + TimeUnit.SECONDS, // keep alive time + new LinkedBlockingQueue<>(maxThreads), // bounded queue for backpressure + r -> { + Thread t = new Thread(r, "pit-worker-" + System.currentTimeMillis()); + t.setDaemon(true); + return t; + }, + new ThreadPoolExecutor.CallerRunsPolicy() // backpressure: caller runs when queue full + ); + + log.info("Initialized PIT thread pool with {} threads and bounded queue", maxThreads); + } + + /** Shutdown the PIT executor service gracefully. */ + public void shutdown() { + if (pitExecutor != null && !pitExecutor.isShutdown()) { + log.info("Shutting down PIT thread pool"); + pitExecutor.shutdown(); + try { + if (!pitExecutor.awaitTermination(30, TimeUnit.SECONDS)) { + log.warn("PIT thread pool did not terminate gracefully, forcing shutdown"); + pitExecutor.shutdownNow(); + if (!pitExecutor.awaitTermination(10, TimeUnit.SECONDS)) { + log.error("PIT thread pool did not terminate after forced shutdown"); + } + } + } catch (InterruptedException e) { + log.warn("Interrupted while waiting for PIT thread pool shutdown", e); + pitExecutor.shutdownNow(); + Thread.currentThread().interrupt(); + } + } } /** @@ -73,6 +119,7 @@ protected List searchWithSlices( for (int sliceId = 0; sliceId < slices; sliceId++) { final int currentSliceId = sliceId; + CompletableFuture> sliceFuture = CompletableFuture.supplyAsync( () -> { @@ -91,7 +138,8 @@ protected List searchWithSlices( slices, remainingTime, entityUrns); - }); + }, + pitExecutor); // Use dedicated thread pool with CallerRunsPolicy for backpressure sliceFutures.add(sliceFuture); } @@ -135,6 +183,12 @@ private List searchSingleSliceWithPit( opContext.getSearchContext().getIndexConvention().getIndexName(INDEX_NAME)); while (sliceRelationships.size() < maxRelations) { + // Check for thread interruption (from future.cancel(true)) + if (Thread.currentThread().isInterrupted()) { + log.warn("Slice {} was interrupted, cleaning up PIT and stopping", sliceId); + throw new RuntimeException("Slice " + sliceId + " was interrupted"); + } + // Check timeout before processing if (remainingTime <= 0) { log.warn("Slice {} timed out, stopping PIT search", sliceId); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index f6a1cacbf43c..615c1a2ce54f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -65,7 +65,7 @@ public class ElasticSearchService implements EntitySearchService, ElasticSearchI @VisibleForTesting @Getter private final ESSearchDAO esSearchDAO; private final ESBrowseDAO esBrowseDAO; - private final ESWriteDAO esWriteDAO; + @Getter private final ESWriteDAO esWriteDAO; @Override public void reindexAll(Collection> properties) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/AbstractBulkProcessorShim.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/AbstractBulkProcessorShim.java new file mode 100644 index 000000000000..7e6353ef06f4 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/AbstractBulkProcessorShim.java @@ -0,0 +1,89 @@ +package com.linkedin.metadata.search.elasticsearch.client.shim.impl; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.opensearch.action.DocWriteRequest; + +/** + * Abstract base class that provides common bulk processor functionality for search client shims. + * This class handles the common patterns of managing multiple bulk processors with round-robin + * distribution and URN-based consistent hashing. + */ +@Slf4j +public abstract class AbstractBulkProcessorShim { + + protected int threadCount = 1; + protected AtomicInteger roundRobinCounter; + protected T[] bulkProcessors; + + /** + * Initialize bulk processor infrastructure with common fields and build the processor array. + * Subclasses should call this method with their processor supplier. + */ + protected void initBulkProcessors(int threadCount, Supplier processorSupplier) { + this.threadCount = threadCount; + this.roundRobinCounter = new AtomicInteger(0); + + @SuppressWarnings("unchecked") + T[] processors = (T[]) new Object[threadCount]; + for (int i = 0; i < threadCount; i++) { + processors[i] = processorSupplier.get(); + } + this.bulkProcessors = processors; + } + + /** + * Add a write request using round-robin distribution across processors. Subclasses must implement + * the actual processor-specific add logic. + */ + public void addBulk(DocWriteRequest writeRequest) { + int index = roundRobinCounter.getAndIncrement() % threadCount; + addToProcessor(bulkProcessors[index], writeRequest); + } + + /** + * Add a write request using URN-based consistent hashing for entity document consistency. + * Subclasses must implement the actual processor-specific add logic. + */ + public void addBulk(String urn, DocWriteRequest writeRequest) { + int index = Math.abs(urn.hashCode()) % threadCount; + addToProcessor(bulkProcessors[index], writeRequest); + } + + /** + * Flush all bulk processors. Subclasses must implement the actual processor-specific flush logic. + */ + public void flushBulkProcessor() { + for (T processor : bulkProcessors) { + flushProcessor(processor); + } + } + + /** + * Close all bulk processors. Subclasses must implement the actual processor-specific close logic. + */ + public void closeBulkProcessor() { + for (T processor : bulkProcessors) { + closeProcessor(processor); + } + } + + /** + * Add a write request to a specific processor. Subclasses must implement this method to handle + * the specific processor type. + */ + protected abstract void addToProcessor(T processor, DocWriteRequest writeRequest); + + /** + * Flush a specific processor. Subclasses must implement this method to handle the specific + * processor type. + */ + protected abstract void flushProcessor(T processor); + + /** + * Close a specific processor. Subclasses must implement this method to handle the specific + * processor type. + */ + protected abstract void closeProcessor(T processor); +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/Es8SearchClientShim.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/Es8SearchClientShim.java index 6220f766e480..39c07f7fe68e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/Es8SearchClientShim.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/Es8SearchClientShim.java @@ -98,6 +98,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -112,9 +113,22 @@ import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.ssl.DefaultHostnameVerifier; import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.util.PublicSuffixMatcherLoader; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager; +import org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor; +import org.apache.http.impl.nio.reactor.IOReactorConfig; +import org.apache.http.nio.conn.NHttpClientConnectionManager; +import org.apache.http.nio.conn.NoopIOSessionStrategy; +import org.apache.http.nio.conn.SchemeIOSessionStrategy; +import org.apache.http.nio.conn.ssl.SSLIOSessionStrategy; +import org.apache.http.nio.reactor.IOReactorException; +import org.apache.http.nio.reactor.IOReactorExceptionHandler; +import org.apache.http.ssl.SSLContexts; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClientBuilder; import org.opensearch.action.DocWriteRequest; @@ -212,13 +226,13 @@ * this, since we always use the default this is likely not a concern, but something to keep in mind */ @Slf4j -public class Es8SearchClientShim implements ElasticSearchClientShim { +public class Es8SearchClientShim extends AbstractBulkProcessorShim> + implements ElasticSearchClientShim { @Getter private final ShimConfiguration shimConfiguration; private final SearchEngineType engineType; private final ElasticsearchClient client; private final ObjectMapper objectMapper; - private BulkIngester bulkProcessor; private final JacksonJsonpMapper jacksonJsonpMapper; static { @@ -261,6 +275,20 @@ private ElasticsearchClient createEs8Client(ShimConfiguration config) throws IOE } } + // Connection manager configuration + try { + httpAsyncClientBuilder.setConnectionManager(createConnectionManager(config)); + } catch (IOReactorException e) { + throw new IllegalStateException( + "Unable to start ElasticSearch client. Please verify connection configuration."); + } + + // IO Reactor configuration + log.info( + "Configuring Elasticsearch client with threadCount: {}", config.getThreadCount()); + httpAsyncClientBuilder.setDefaultIOReactorConfig( + IOReactorConfig.custom().setIoThreadCount(config.getThreadCount()).build()); + // Authentication configureAuthentication(httpAsyncClientBuilder, config); @@ -290,6 +318,58 @@ private RestClientBuilder createRestClientBuilder(ShimConfiguration config) { return builder; } + /** + * Create connection manager with proper thread count and connection pool configuration. This + * ensures optimal performance by matching connection pool size to thread count. + */ + private NHttpClientConnectionManager createConnectionManager(ShimConfiguration config) + throws IOReactorException { + SSLContext sslContext = SSLContexts.createDefault(); + javax.net.ssl.HostnameVerifier hostnameVerifier = + new DefaultHostnameVerifier(PublicSuffixMatcherLoader.getDefault()); + SchemeIOSessionStrategy sslStrategy = + new SSLIOSessionStrategy(sslContext, null, null, hostnameVerifier); + + log.info("Creating IOReactorConfig with threadCount: {}", config.getThreadCount()); + IOReactorConfig ioReactorConfig = + IOReactorConfig.custom().setIoThreadCount(config.getThreadCount()).build(); + DefaultConnectingIOReactor ioReactor = new DefaultConnectingIOReactor(ioReactorConfig); + IOReactorExceptionHandler ioReactorExceptionHandler = + new IOReactorExceptionHandler() { + @Override + public boolean handle(java.io.IOException ex) { + log.error("IO Exception caught during ElasticSearch connection.", ex); + return true; + } + + @Override + public boolean handle(RuntimeException ex) { + log.error("Runtime Exception caught during ElasticSearch connection.", ex); + return true; + } + }; + ioReactor.setExceptionHandler(ioReactorExceptionHandler); + + PoolingNHttpClientConnectionManager connectionManager = + new PoolingNHttpClientConnectionManager( + ioReactor, + RegistryBuilder.create() + .register("http", NoopIOSessionStrategy.INSTANCE) + .register("https", sslStrategy) + .build()); + + // Set maxConnectionsPerRoute to match threadCount (minimum 2) + int maxConnectionsPerRoute = Math.max(2, config.getThreadCount()); + connectionManager.setDefaultMaxPerRoute(maxConnectionsPerRoute); + + log.info( + "Configured connection pool: maxPerRoute={} (threadCount={})", + maxConnectionsPerRoute, + config.getThreadCount()); + + return connectionManager; + } + private void configureAuthentication( HttpAsyncClientBuilder httpAsyncClientBuilder, ShimConfiguration config) { // Basic authentication @@ -1497,31 +1577,42 @@ public void generateAsyncBulkProcessor( int bulkRequestsLimit, long bulkFlushPeriod, long retryInterval, - int numRetries) { - co.elastic.clients.elasticsearch._helpers.bulk.BulkListener esBulkListener = - new Es8BulkListener(metricUtils); - BulkIngester.Builder builder = - new BulkIngester.Builder<>() - .client(client) - .flushInterval(bulkFlushPeriod, TimeUnit.SECONDS) - .maxOperations(bulkRequestsLimit) - .listener(esBulkListener); - final Refresh refresh; - switch (writeRequestRefreshPolicy) { - case NONE: - refresh = Refresh.False; - break; - case IMMEDIATE: - refresh = Refresh.True; - break; - case WAIT_UNTIL: - refresh = Refresh.WaitFor; - break; - default: - refresh = null; - } - builder.globalSettings(new BulkRequest.Builder().refresh(refresh)); - this.bulkProcessor = builder.build(); + int numRetries, + int threadCount) { + Supplier> processorSupplier = + () -> { + co.elastic.clients.elasticsearch._helpers.bulk.BulkListener esBulkListener = + new Es8BulkListener(metricUtils); + + final Refresh refresh; + switch (writeRequestRefreshPolicy) { + case NONE: + refresh = Refresh.False; + break; + case IMMEDIATE: + refresh = Refresh.True; + break; + case WAIT_UNTIL: + refresh = Refresh.WaitFor; + break; + default: + refresh = null; + } + + BulkIngester.Builder builder = + new BulkIngester.Builder<>() + .client(client) + .flushInterval(bulkFlushPeriod, TimeUnit.SECONDS) + .maxOperations(bulkRequestsLimit) + .listener(esBulkListener); + + builder.globalSettings(new BulkRequest.Builder().refresh(refresh)); + return builder.build(); + }; + + initBulkProcessors(threadCount, processorSupplier); + + log.info("Initialized {} async bulk processors for parallel execution", threadCount); } @Override @@ -1531,18 +1622,21 @@ public void generateBulkProcessor( int bulkRequestsLimit, long bulkFlushPeriod, long retryInterval, - int numRetries) { + int numRetries, + int threadCount) { + // ES8 uses async processors for both sync and async operations generateAsyncBulkProcessor( writeRequestRefreshPolicy, metricUtils, bulkRequestsLimit, bulkFlushPeriod, retryInterval, - numRetries); + numRetries, + threadCount); } @Override - public void addBulk(DocWriteRequest writeRequest) { + protected void addToProcessor(BulkIngester processor, DocWriteRequest writeRequest) { BulkOperation operation; if (writeRequest instanceof UpdateRequest) { UpdateRequest update = (UpdateRequest) writeRequest; @@ -1596,17 +1690,17 @@ public void addBulk(DocWriteRequest writeRequest) { .v2()) .build()); } - bulkProcessor.add(operation); + processor.add(operation); } @Override - public void flushBulkProcessor() { - bulkProcessor.flush(); + protected void flushProcessor(BulkIngester processor) { + processor.flush(); } @Override - public void closeBulkProcessor() { - bulkProcessor.close(); + protected void closeProcessor(BulkIngester processor) { + processor.close(); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/OpenSearch2SearchClientShim.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/OpenSearch2SearchClientShim.java index b28286d56a3b..caece14a171d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/OpenSearch2SearchClientShim.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/client/shim/impl/OpenSearch2SearchClientShim.java @@ -8,6 +8,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Optional; +import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.SSLContext; @@ -112,12 +113,12 @@ * implementation is very similar to Es7CompatibilitySearchClientShim. */ @Slf4j -public class OpenSearch2SearchClientShim implements OpenSearchClientShim { +public class OpenSearch2SearchClientShim extends AbstractBulkProcessorShim + implements OpenSearchClientShim { @Getter private final ShimConfiguration shimConfiguration; private final RestHighLevelClient client; protected SearchEngineType engineType; - private BulkProcessor bulkProcessor; public OpenSearch2SearchClientShim(@Nonnull ShimConfiguration config) throws IOException { this.shimConfiguration = config; @@ -192,6 +193,7 @@ private NHttpClientConnectionManager createConnectionManager() throws IOReactorE SchemeIOSessionStrategy sslStrategy = new SSLIOSessionStrategy(sslContext, null, null, hostnameVerifier); + log.info("Creating IOReactorConfig with threadCount: {}", shimConfiguration.getThreadCount()); IOReactorConfig ioReactorConfig = IOReactorConfig.custom().setIoThreadCount(shimConfiguration.getThreadCount()).build(); DefaultConnectingIOReactor ioReactor = new DefaultConnectingIOReactor(ioReactorConfig); @@ -211,12 +213,24 @@ public boolean handle(RuntimeException ex) { }; ioReactor.setExceptionHandler(ioReactorExceptionHandler); - return new PoolingNHttpClientConnectionManager( - ioReactor, - RegistryBuilder.create() - .register("http", NoopIOSessionStrategy.INSTANCE) - .register("https", sslStrategy) - .build()); + PoolingNHttpClientConnectionManager connectionManager = + new PoolingNHttpClientConnectionManager( + ioReactor, + RegistryBuilder.create() + .register("http", NoopIOSessionStrategy.INSTANCE) + .register("https", sslStrategy) + .build()); + + // Set maxConnectionsPerRoute to match threadCount (minimum 2) + int maxConnectionsPerRoute = Math.max(2, shimConfiguration.getThreadCount()); + connectionManager.setDefaultMaxPerRoute(maxConnectionsPerRoute); + + log.info( + "Configured connection pool: maxPerRoute={} (threadCount={})", + maxConnectionsPerRoute, + shimConfiguration.getThreadCount()); + + return connectionManager; } private void setCredentials(HttpAsyncClientBuilder httpAsyncClientBuilder) { @@ -577,22 +591,25 @@ public void generateAsyncBulkProcessor( int bulkRequestsLimit, long bulkFlushPeriod, long retryInterval, - int numRetries) { - bulkProcessor = - BulkProcessor.builder( - (request, bulkListener) -> { - client.bulkAsync(request, RequestOptions.DEFAULT, bulkListener); - }, - BulkListener.getInstance(writeRequestRefreshPolicy, metricUtils)) - .setBulkActions(bulkRequestsLimit) - .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod)) - // This retry is ONLY for "resource constraints", i.e. 429 errors (each request has - // other - // retry methods) - .setBackoffPolicy( - BackoffPolicy.constantBackoff( - TimeValue.timeValueSeconds(retryInterval), numRetries)) - .build(); + int numRetries, + int threadCount) { + Supplier processorSupplier = + () -> + BulkProcessor.builder( + (request, bulkListener) -> { + client.bulkAsync(request, RequestOptions.DEFAULT, bulkListener); + }, + BulkListener.getInstance(0, writeRequestRefreshPolicy, metricUtils)) + .setBulkActions(bulkRequestsLimit) + .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod)) + .setBackoffPolicy( + BackoffPolicy.constantBackoff( + TimeValue.timeValueSeconds(retryInterval), numRetries)) + .build(); + + initBulkProcessors(threadCount, processorSupplier); + + log.info("Initialized {} async bulk processors for parallel execution", threadCount); } @Override @@ -602,43 +619,46 @@ public void generateBulkProcessor( int bulkRequestsLimit, long bulkFlushPeriod, long retryInterval, - int numRetries) { - bulkProcessor = - BulkProcessor.builder( - (request, bulkListener) -> { - try { - BulkResponse response = client.bulk(request, RequestOptions.DEFAULT); - bulkListener.onResponse(response); - } catch (IOException e) { - bulkListener.onFailure(e); - throw new RuntimeException(e); - } - }, - BulkListener.getInstance(writeRequestRefreshPolicy, metricUtils)) - .setBulkActions(bulkRequestsLimit) - .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod)) - // This retry is ONLY for "resource constraints", i.e. 429 errors (each request has - // other - // retry methods) - .setBackoffPolicy( - BackoffPolicy.constantBackoff( - TimeValue.timeValueSeconds(retryInterval), numRetries)) - .build(); + int numRetries, + int threadCount) { + Supplier processorSupplier = + () -> + BulkProcessor.builder( + (request, bulkListener) -> { + try { + BulkResponse response = client.bulk(request, RequestOptions.DEFAULT); + bulkListener.onResponse(response); + } catch (IOException e) { + bulkListener.onFailure(e); + throw new RuntimeException(e); + } + }, + BulkListener.getInstance(0, writeRequestRefreshPolicy, metricUtils)) + .setBulkActions(bulkRequestsLimit) + .setFlushInterval(TimeValue.timeValueSeconds(bulkFlushPeriod)) + .setBackoffPolicy( + BackoffPolicy.constantBackoff( + TimeValue.timeValueSeconds(retryInterval), numRetries)) + .build(); + + initBulkProcessors(threadCount, processorSupplier); + + log.info("Initialized {} bulk processors for parallel execution", threadCount); } @Override - public void addBulk(DocWriteRequest writeRequest) { - bulkProcessor.add(writeRequest); + protected void addToProcessor(BulkProcessor processor, DocWriteRequest writeRequest) { + processor.add(writeRequest); } @Override - public void flushBulkProcessor() { - bulkProcessor.flush(); + protected void flushProcessor(BulkProcessor processor) { + processor.flush(); } @Override - public void closeBulkProcessor() { - bulkProcessor.close(); + protected void closeProcessor(BulkProcessor processor) { + processor.close(); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 835a9ccb15fb..c7a3bedb5224 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -1084,6 +1084,28 @@ private void setIndexSetting(String indexName, String value, String setting) thr _searchClient.updateIndexSettings(request, RequestOptions.DEFAULT); } + /** + * Sets the refresh interval for an index. + * + * @param indexName the name of the index + * @param refreshInterval the refresh interval value (e.g., "1s", "-1" for disabled) + * @throws IOException if there's an error communicating with Elasticsearch + */ + public void setIndexRefreshInterval(String indexName, String refreshInterval) throws IOException { + setIndexSetting(indexName, refreshInterval, INDEX_REFRESH_INTERVAL); + } + + /** + * Sets the replica count for an index. + * + * @param indexName the name of the index + * @param replicaCount the number of replicas + * @throws IOException if there's an error communicating with Elasticsearch + */ + public void setIndexReplicaCount(String indexName, int replicaCount) throws IOException { + setIndexSetting(indexName, String.valueOf(replicaCount), INDEX_NUMBER_OF_REPLICAS); + } + private Map submitReindex( String[] sourceIndices, String destinationIndex, @@ -1228,7 +1250,7 @@ private void createIndex(String indexName, ReindexConfig state) throws IOExcepti log.info("Created index {}", indexName); } - public static void cleanIndex( + public static void cleanOrphanedIndices( SearchClientShim searchClient, ElasticSearchConfiguration esConfig, ReindexConfig indexState) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java index 83c4b3a24bda..efee1739b95c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java @@ -14,15 +14,16 @@ @Slf4j public class BulkListener implements BulkProcessor.Listener { - private static final Map INSTANCES = new HashMap<>(); + private static final Map INSTANCES = new HashMap<>(); public static BulkListener getInstance(MetricUtils metricUtils) { - return INSTANCES.computeIfAbsent(null, p -> new BulkListener(p, metricUtils)); + return INSTANCES.computeIfAbsent("null", p -> new BulkListener(null, metricUtils)); } public static BulkListener getInstance( - WriteRequest.RefreshPolicy refreshPolicy, MetricUtils metricUtils) { - return INSTANCES.computeIfAbsent(refreshPolicy, p -> new BulkListener(p, metricUtils)); + int processorIndex, WriteRequest.RefreshPolicy refreshPolicy, MetricUtils metricUtils) { + String key = processorIndex + ":" + refreshPolicy; + return INSTANCES.computeIfAbsent(key, p -> new BulkListener(refreshPolicy, metricUtils)); } private final WriteRequest.RefreshPolicy refreshPolicy; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java index 0f27642222c4..d6c199005863 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java @@ -44,6 +44,7 @@ public static ESBulkProcessor.ESBulkProcessorBuilder builder( @Builder.Default private Integer bulkFlushPeriod = 1; @Builder.Default private Integer numRetries = 3; @Builder.Default private Long retryInterval = 1L; + @Builder.Default private Integer threadCount = 1; // Default to single processor @Builder.Default private TimeValue defaultTimeout = TimeValue.timeValueMinutes(1); @Getter private final WriteRequest.RefreshPolicy writeRequestRefreshPolicy; @@ -57,6 +58,7 @@ private ESBulkProcessor( Integer bulkFlushPeriod, Integer numRetries, Long retryInterval, + Integer threadCount, TimeValue defaultTimeout, WriteRequest.RefreshPolicy writeRequestRefreshPolicy, MetricUtils metricUtils) { @@ -67,6 +69,7 @@ private ESBulkProcessor( this.bulkFlushPeriod = bulkFlushPeriod; this.numRetries = numRetries; this.retryInterval = retryInterval; + this.threadCount = threadCount; this.defaultTimeout = defaultTimeout; this.writeRequestRefreshPolicy = writeRequestRefreshPolicy; if (async) { @@ -76,7 +79,8 @@ private ESBulkProcessor( bulkRequestsLimit, bulkFlushPeriod, retryInterval, - numRetries); + numRetries, + threadCount); } else { searchClient.generateBulkProcessor( writeRequestRefreshPolicy, @@ -84,7 +88,8 @@ private ESBulkProcessor( bulkRequestsLimit, bulkFlushPeriod, retryInterval, - numRetries); + numRetries, + threadCount); } this.metricUtils = metricUtils; } @@ -92,7 +97,7 @@ private ESBulkProcessor( public ESBulkProcessor add(DocWriteRequest request) { if (metricUtils != null) metricUtils.increment(this.getClass(), ES_WRITES_METRIC, 1); searchClient.addBulk(request); - log.info( + log.debug( "Added request id: {}, operation type: {}, index: {}", request.id(), request.opType(), @@ -100,6 +105,27 @@ public ESBulkProcessor add(DocWriteRequest request) { return this; } + /** + * Add a request with URN-based routing for entity document consistency. This method routes all + * operations for the same URN to the same BulkProcessor to ensure consistent ordering and avoid + * conflicts when updating the same entity. + * + * @param urn the URN of the entity being updated + * @param request the document write request + * @return this ESBulkProcessor instance + */ + public ESBulkProcessor add(@Nonnull String urn, @Nonnull DocWriteRequest request) { + if (metricUtils != null) metricUtils.increment(this.getClass(), ES_WRITES_METRIC, 1); + searchClient.addBulk(urn, request); + log.debug( + "Added URN-aware request urn: {}, id: {}, operation type: {}, index: {}", + urn, + request.id(), + request.opType(), + request.index()); + return this; + } + public Optional deleteByQuery( QueryBuilder queryBuilder, String... indices) { return deleteByQuery(queryBuilder, true, bulkRequestsLimit, defaultTimeout, indices); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java index 9f1b5f8f06cf..f2e54c72f4c1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java @@ -18,6 +18,7 @@ import javax.annotation.Nullable; import lombok.Builder; import lombok.Data; +import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.delete.DeleteRequest; @@ -42,7 +43,7 @@ public class ESWriteDAO { private final ElasticSearchConfiguration config; private final SearchClientShim searchClient; - private final ESBulkProcessor bulkProcessor; + @Getter private final ESBulkProcessor bulkProcessor; /** Result of a delete by query operation */ @Data diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index 68cf27b92240..24053199080d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -198,6 +198,10 @@ public Optional transformAspect( final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); searchDocument.put("urn", urn.toString()); + // Check if the entity has any searchable aspects + EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(urn.getEntityType()); + boolean entityHasSearchableAspects = !entitySpec.getSearchableFieldSpecs().isEmpty(); + if (!extractedSearchableFields.isEmpty() || !extractedSearchScoreFields.isEmpty() || !extractedSearchRefFields.isEmpty()) { @@ -211,7 +215,12 @@ public Optional transformAspect( extractedSearchScoreFields.forEach( (key, values) -> setSearchScoreValue(key, values, searchDocument, forDelete)); result = Optional.of(searchDocument); + } else if (entityHasSearchableAspects) { + // If entity has searchable aspects but current aspect has no searchable fields, + // still create a search document with just the URN + result = Optional.of(searchDocument); } + if (STRUCTURED_PROPERTIES_ASPECT_NAME.equals(aspectSpec.getName())) { setStructuredPropertiesSearchValue( opContext, new StructuredProperties(aspect.data()), searchDocument, forDelete); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java index bf5733173e4e..f55b8ca7792a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java @@ -45,6 +45,7 @@ import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -107,6 +108,12 @@ public UpdateGraphIndicesService( this.fineGrainedLineageNotAllowedForPlatforms = fineGrainedLineageNotAllowedForPlatforms; } + @Override + public void handleChangeEvents( + @Nonnull OperationContext opContext, @Nonnull Collection events) { + events.forEach(event -> handleChangeEvent(opContext, event)); + } + @Override public void handleChangeEvent( @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index 967803321211..e05198a1c50e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -22,6 +22,8 @@ import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; @@ -34,6 +36,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -113,6 +116,12 @@ public UpdateIndicesService( this.structuredPropertiesWriteEnabled = structuredPropertiesWriteEnabled; } + @Override + public void handleChangeEvents( + @Nonnull OperationContext opContext, @Nonnull Collection events) { + events.forEach(event -> handleChangeEvent(opContext, event)); + } + @Override public void handleChangeEvent( @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { @@ -341,7 +350,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev } if (searchDocument.isEmpty()) { - log.info("Search document for urn: {} aspect: {} was empty", urn, aspect); + log.warn("Search document for urn: {} aspect: {} was empty", urn, aspect); return; } @@ -485,4 +494,23 @@ private void deleteSearchData( elasticSearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); } + + /** + * Flushes any pending operations in the bulk processor to ensure all data is written to + * Elasticsearch. This is particularly important for loadIndices operations where we want to + * ensure all data is persisted. + */ + public void flush() { + try { + // Access the bulk processor through the ElasticSearchService's ESWriteDAO + ESWriteDAO writeDAO = elasticSearchService.getEsWriteDAO(); + ESBulkProcessor bulkProcessor = writeDAO.getBulkProcessor(); + + bulkProcessor.flush(); + log.info("Successfully flushed bulk processor"); + } catch (Exception e) { + log.error("Failed to flush bulk processor", e); + throw new RuntimeException("Failed to flush bulk processor", e); + } + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/PolicyFieldTypeValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/PolicyFieldTypeValidatorTest.java new file mode 100644 index 000000000000..b47f2d5b09aa --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/validation/PolicyFieldTypeValidatorTest.java @@ -0,0 +1,366 @@ +package com.linkedin.metadata.aspect.validation; + +import static com.linkedin.metadata.Constants.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringArray; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.policy.DataHubActorFilter; +import com.linkedin.policy.DataHubPolicyInfo; +import com.linkedin.policy.DataHubResourceFilter; +import com.linkedin.policy.PolicyMatchCondition; +import com.linkedin.policy.PolicyMatchCriterion; +import com.linkedin.policy.PolicyMatchCriterionArray; +import com.linkedin.policy.PolicyMatchFilter; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import java.util.List; +import java.util.Set; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class PolicyFieldTypeValidatorTest { + private static final Urn TEST_POLICY_URN = UrnUtils.getUrn("urn:li:dataHubPolicy:test-policy"); + + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(PolicyFieldTypeValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of("CREATE", "CREATE_ENTITY", "UPSERT", "UPDATE", "PATCH")) + .supportedEntityAspectNames( + List.of( + new AspectPluginConfig.EntityAspectName( + POLICY_ENTITY_NAME, DATAHUB_POLICY_INFO_ASPECT_NAME))) + .build(); + + @Mock private RetrieverContext mockRetrieverContext; + + @Mock private AspectRetriever mockAspectRetriever; + + private EntityRegistry entityRegistry; + + private PolicyFieldTypeValidator validator; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + entityRegistry = new TestEntityRegistry(); + validator = new PolicyFieldTypeValidator(); + validator.setConfig(TEST_PLUGIN_CONFIG); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + } + + @Test + public void testValidFieldTypeInFilter() { + DataHubPolicyInfo policyInfo = createPolicyInfoWithFilter("TYPE", "dataset"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 0, + "Expected validation to pass for valid field type TYPE"); + } + + @Test + public void testValidFieldTypeInPrivilegeConstraints() { + DataHubPolicyInfo policyInfo = createPolicyInfoWithPrivilegeConstraints("DOMAIN"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 0, + "Expected validation to pass for valid field type DOMAIN"); + } + + @Test + public void testInvalidFieldTypeInFilter() { + DataHubPolicyInfo policyInfo = createPolicyInfoWithFilter("INVALID_FIELD_TYPE", "somevalue"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 1, + "Expected validation to fail for invalid field type INVALID_FIELD_TYPE"); + } + + @Test + public void testInvalidFieldTypeInPrivilegeConstraints() { + DataHubPolicyInfo policyInfo = createPolicyInfoWithPrivilegeConstraints("INVALID_CONSTRAINT"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 1, + "Expected validation to fail for invalid field type INVALID_CONSTRAINT"); + } + + @Test + public void testMultipleValidFieldTypes() { + DataHubPolicyInfo policyInfo = createPolicyInfoWithMultipleFilters("TYPE", "DOMAIN"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 0, + "Expected validation to pass for multiple valid field types"); + } + + @Test + public void testMixedValidAndInvalidFieldTypes() { + DataHubPolicyInfo policyInfo = createPolicyInfoWithMultipleFilters("TYPE", "INVALID_FIELD"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 1, + "Expected validation to fail when at least one field type is invalid"); + } + + @Test + public void testDeprecatedFieldTypesStillValid() { + DataHubPolicyInfo policyInfo = + createPolicyInfoWithFilter("RESOURCE_URN", "urn:li:dataset:test"); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 0, + "Expected validation to pass for deprecated but still valid field type RESOURCE_URN"); + } + + @Test + public void testPolicyWithoutResources() { + DataHubPolicyInfo policyInfo = createBasicPolicyInfo(); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 0, + "Expected validation to pass for policy without resources"); + } + + @Test + public void testPolicyWithoutFilter() { + DataHubPolicyInfo policyInfo = createBasicPolicyInfo(); + DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); + resourceFilter.setAllResources(true); + policyInfo.setResources(resourceFilter); + + assertEquals( + validator + .validateProposed( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(TEST_POLICY_URN) + .entitySpec(entityRegistry.getEntitySpec(TEST_POLICY_URN.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(TEST_POLICY_URN.getEntityType()) + .getAspectSpec(DATAHUB_POLICY_INFO_ASPECT_NAME)) + .recordTemplate(policyInfo) + .build()), + mockRetrieverContext, + null) + .count(), + 0, + "Expected validation to pass for policy without filter"); + } + + private DataHubPolicyInfo createBasicPolicyInfo() { + return new DataHubPolicyInfo() + .setActors(new DataHubActorFilter()) + .setEditable(true) + .setDescription("Test Policy") + .setDisplayName("Test Policy") + .setLastUpdatedTimestamp(123L) + .setPrivileges(new StringArray("EDIT_ENTITY")) + .setState("ACTIVE") + .setType("METADATA"); + } + + private DataHubPolicyInfo createPolicyInfoWithFilter(String fieldType, String value) { + DataHubPolicyInfo policyInfo = createBasicPolicyInfo(); + + PolicyMatchCriterion criterion = new PolicyMatchCriterion(); + criterion.setField(fieldType); + criterion.setValues(new StringArray(value)); + criterion.setCondition(PolicyMatchCondition.EQUALS); + + PolicyMatchFilter filter = new PolicyMatchFilter(); + filter.setCriteria(new PolicyMatchCriterionArray(criterion)); + + DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); + resourceFilter.setAllResources(false); + resourceFilter.setFilter(filter); + + policyInfo.setResources(resourceFilter); + return policyInfo; + } + + private DataHubPolicyInfo createPolicyInfoWithPrivilegeConstraints(String fieldType) { + DataHubPolicyInfo policyInfo = createBasicPolicyInfo(); + + PolicyMatchCriterion criterion = new PolicyMatchCriterion(); + criterion.setField(fieldType); + criterion.setValues(new StringArray("urn:li:domain:engineering")); + criterion.setCondition(PolicyMatchCondition.EQUALS); + + PolicyMatchFilter filter = new PolicyMatchFilter(); + filter.setCriteria(new PolicyMatchCriterionArray(criterion)); + + DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); + resourceFilter.setAllResources(false); + resourceFilter.setPrivilegeConstraints(filter); + + policyInfo.setResources(resourceFilter); + return policyInfo; + } + + private DataHubPolicyInfo createPolicyInfoWithMultipleFilters( + String fieldType1, String fieldType2) { + DataHubPolicyInfo policyInfo = createBasicPolicyInfo(); + + PolicyMatchCriterion criterion1 = new PolicyMatchCriterion(); + criterion1.setField(fieldType1); + criterion1.setValues(new StringArray("value1")); + criterion1.setCondition(PolicyMatchCondition.EQUALS); + + PolicyMatchCriterion criterion2 = new PolicyMatchCriterion(); + criterion2.setField(fieldType2); + criterion2.setValues(new StringArray("value2")); + criterion2.setCondition(PolicyMatchCondition.EQUALS); + + PolicyMatchFilter filter = new PolicyMatchFilter(); + filter.setCriteria(new PolicyMatchCriterionArray(criterion1, criterion2)); + + DataHubResourceFilter resourceFilter = new DataHubResourceFilter(); + resourceFilter.setAllResources(false); + resourceFilter.setFilter(filter); + + policyInfo.setResources(resourceFilter); + return policyInfo; + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java index c0aa72f7f282..fedf12067d19 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.elasticsearch.update; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -18,24 +17,28 @@ public class BulkListenerTest { @Test public void testConstructor() { - BulkListener test = BulkListener.getInstance(mock(MetricUtils.class)); + MetricUtils metricUtils = mock(MetricUtils.class); + BulkListener test = + BulkListener.getInstance(0, WriteRequest.RefreshPolicy.IMMEDIATE, metricUtils); assertNotNull(test); - assertEquals(test, BulkListener.getInstance(mock(MetricUtils.class))); + assertEquals( + test, BulkListener.getInstance(0, WriteRequest.RefreshPolicy.IMMEDIATE, metricUtils)); assertNotEquals( - test, - BulkListener.getInstance(WriteRequest.RefreshPolicy.IMMEDIATE, mock(MetricUtils.class))); + test, BulkListener.getInstance(1, WriteRequest.RefreshPolicy.IMMEDIATE, metricUtils)); } @Test public void testDefaultPolicy() { - BulkListener test = BulkListener.getInstance(mock(MetricUtils.class)); + MetricUtils metricUtils = mock(MetricUtils.class); + BulkListener test = + BulkListener.getInstance(0, WriteRequest.RefreshPolicy.IMMEDIATE, metricUtils); BulkRequest mockRequest1 = mock(BulkRequest.class); test.beforeBulk(0L, mockRequest1); - verify(mockRequest1, times(0)).setRefreshPolicy(any(WriteRequest.RefreshPolicy.class)); + verify(mockRequest1, times(1)).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); BulkRequest mockRequest2 = mock(BulkRequest.class); - test = BulkListener.getInstance(WriteRequest.RefreshPolicy.IMMEDIATE, mock(MetricUtils.class)); + test = BulkListener.getInstance(0, WriteRequest.RefreshPolicy.IMMEDIATE, metricUtils); test.beforeBulk(0L, mockRequest2); verify(mockRequest2, times(1)).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java index 7e8d825161e9..8ad11e5e2408 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ebean/EbeanAspectDaoTest.java @@ -117,4 +117,30 @@ public void testbatchGetForUpdate() throws JsonProcessingException { assertTrue( sql.get(0).contains("FOR UPDATE;"), String.format("Did not find `for update` in %s ", sql)); } + + @Test + public void testStreamAspectBatchesWithIsolationLevel() { + // Test the new overloaded method with isolation level parameter + var args = new com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs(); + args.limit = 10; + + // Test with READ_UNCOMMITTED isolation level + var stream = + testDao.streamAspectBatches(args, io.ebean.annotation.TxIsolation.READ_UNCOMMITTED); + assertTrue(stream != null); + + // Test with null isolation level (should use default) + var defaultStream = testDao.streamAspectBatches(args, null); + assertTrue(defaultStream != null); + } + + @Test + public void testStreamAspectBatchesDefault() { + // Test the original method still works + var args = new com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs(); + args.limit = 5; + + var stream = testDao.streamAspectBatches(args); + assertTrue(stream != null); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAORelationshipGroupQueryTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAORelationshipGroupQueryTest.java index 86d40d5ddf39..19a58b0e1c79 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAORelationshipGroupQueryTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAORelationshipGroupQueryTest.java @@ -33,6 +33,10 @@ import org.mockito.ArgumentCaptor; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.CreatePitResponse; +import org.opensearch.action.search.DeletePitRequest; +import org.opensearch.action.search.DeletePitResponse; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.RequestOptions; @@ -57,6 +61,24 @@ public void setup() { mockClient = mock(SearchClientShim.class); when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + // Mock PIT operations + CreatePitResponse mockCreatePitResponse = mock(CreatePitResponse.class); + when(mockCreatePitResponse.getId()).thenReturn("test-pit-id"); + try { + when(mockClient.createPit(any(CreatePitRequest.class), eq(RequestOptions.DEFAULT))) + .thenReturn(mockCreatePitResponse); + } catch (IOException e) { + // This should not happen in tests + } + + DeletePitResponse mockDeletePitResponse = mock(DeletePitResponse.class); + try { + when(mockClient.deletePit(any(DeletePitRequest.class), eq(RequestOptions.DEFAULT))) + .thenReturn(mockDeletePitResponse); + } catch (IOException e) { + // This should not happen in tests + } + // Create configuration with timeout and batch settings GraphQueryConfiguration graphConfig = GraphQueryConfiguration.builder() @@ -64,6 +86,7 @@ public void setup() { .batchSize(25) .enableMultiPathSearch(true) .boostViaNodes(true) + .maxThreads(1) // Ensure valid thread count for GraphQueryPITDAO .build(); LimitConfig limitConfig = @@ -575,6 +598,7 @@ public SearchResponse answer(InvocationOnMock invocation) throws Throwable { .batchSize(25) .enableMultiPathSearch(true) // Enable multiple paths .queryOptimization(true) + .maxThreads(1) // Ensure valid thread count for GraphQueryPITDAO .build(); ElasticSearchConfiguration testESConfig = @@ -604,6 +628,7 @@ public SearchResponse answer(InvocationOnMock invocation) throws Throwable { .timeoutSeconds(10) .batchSize(25) .enableMultiPathSearch(false) // Disable multiple paths + .maxThreads(1) // Ensure valid thread count for GraphQueryPITDAO .build(); ElasticSearchConfiguration testSinglePathConfig = @@ -1079,7 +1104,13 @@ public void testExploredFlagWithEmptyCurrentLevel() throws IOException { .batchSize(25) .enableMultiPathSearch(true) .pointInTimeCreationEnabled(true) - .impact(ImpactConfiguration.builder().maxRelations(1000).maxHops(10).build()) + .maxThreads(1) // Ensure valid thread count for GraphQueryPITDAO + .impact( + ImpactConfiguration.builder() + .maxRelations(1000) + .maxHops(10) + .keepAlive("5m") + .build()) .build(); ElasticSearchConfiguration testESConfig = @@ -1143,7 +1174,13 @@ public void testExploredFlagWithNullRelationships() throws IOException { .batchSize(25) .enableMultiPathSearch(true) .pointInTimeCreationEnabled(true) - .impact(ImpactConfiguration.builder().maxRelations(1000).maxHops(10).build()) + .maxThreads(1) // Ensure valid thread count for GraphQueryPITDAO + .impact( + ImpactConfiguration.builder() + .maxRelations(1000) + .maxHops(10) + .keepAlive("5m") + .build()) .build(); ElasticSearchConfiguration testESConfig = diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java index 8025fda31184..4a82fd9590b0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java @@ -6,6 +6,7 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; @@ -14,6 +15,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.config.graph.GraphServiceConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.graph.GraphFilters; import com.linkedin.metadata.graph.LineageGraphFilters; import com.linkedin.metadata.query.filter.SortCriterion; @@ -58,6 +61,16 @@ public void setUp() { mockSortCriteria = Arrays.asList(mock(SortCriterion.class)); mockSearchRequest = mock(SearchRequest.class); mockSearchResponse = mock(SearchResponse.class); + + // Configure nested mock objects for ElasticSearchConfiguration + SearchConfiguration mockSearchConfig = mock(SearchConfiguration.class); + GraphQueryConfiguration mockGraphQueryConfig = mock(GraphQueryConfiguration.class); + + when(mockElasticSearchConfig.getSearch()).thenReturn(mockSearchConfig); + when(mockSearchConfig.getGraph()).thenReturn(mockGraphQueryConfig); + + // Configure GraphQueryConfiguration with valid values for thread pool creation + when(mockGraphQueryConfig.getMaxThreads()).thenReturn(1); } @Test @@ -397,4 +410,79 @@ public void testGetSearchResponseAdvancedWithNullParameters() { .getSearchResponse( mockOperationContext, mockGraphFilters, mockSortCriteria, "scroll123", "5m", null); } + + @Test + public void testDestroyWithGraphQueryPITDAO() throws Exception { + // Test destroy() method when delegate is GraphQueryPITDAO + SearchClientShim testClient = mock(SearchClientShim.class); + when(testClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + // Create a real GraphQueryPITDAO as delegate + GraphQueryPITDAO pitDAO = + new GraphQueryPITDAO(testClient, mockGraphServiceConfig, mockElasticSearchConfig, null); + + // Create ESGraphQueryDAO with GraphQueryPITDAO as delegate + ESGraphQueryDAO dao = + new ESGraphQueryDAO(testClient, mockGraphServiceConfig, mockElasticSearchConfig, null); + + // Use reflection to set the delegate to our GraphQueryPITDAO + java.lang.reflect.Field delegateField = ESGraphQueryDAO.class.getDeclaredField("delegate"); + delegateField.setAccessible(true); + delegateField.set(dao, pitDAO); + + // Verify the delegate is a GraphQueryPITDAO + GraphQueryBaseDAO actualDelegate = (GraphQueryBaseDAO) delegateField.get(dao); + assertTrue(actualDelegate instanceof GraphQueryPITDAO); + + // Call destroy() + dao.destroy(); + + // Verify that the pitExecutor is shutdown + assertTrue(pitDAO.pitExecutor.isShutdown(), "PIT executor should be shutdown after destroy()"); + assertTrue( + pitDAO.pitExecutor.isTerminated(), "PIT executor should be terminated after destroy()"); + } + + @Test + public void testDestroyWithNonGraphQueryPITDAO() throws Exception { + // Test destroy() method when delegate is NOT GraphQueryPITDAO + SearchClientShim testClient = mock(SearchClientShim.class); + when(testClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.ELASTICSEARCH_7); + + // Create ESGraphQueryDAO (which will have GraphQueryElasticsearch7DAO as delegate) + ESGraphQueryDAO dao = + new ESGraphQueryDAO(testClient, mockGraphServiceConfig, mockElasticSearchConfig, null); + + // Verify the delegate is NOT a GraphQueryPITDAO + java.lang.reflect.Field delegateField = ESGraphQueryDAO.class.getDeclaredField("delegate"); + delegateField.setAccessible(true); + GraphQueryBaseDAO actualDelegate = (GraphQueryBaseDAO) delegateField.get(dao); + assertFalse(actualDelegate instanceof GraphQueryPITDAO); + + // Call destroy() - should not throw exception + dao.destroy(); + + // Test passes if no exception is thrown + } + + @Test + public void testDestroyWithNullDelegate() throws Exception { + // Test destroy() method when delegate is null + SearchClientShim testClient = mock(SearchClientShim.class); + when(testClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + // Create ESGraphQueryDAO + ESGraphQueryDAO dao = + new ESGraphQueryDAO(testClient, mockGraphServiceConfig, mockElasticSearchConfig, null); + + // Use reflection to set the delegate to null + java.lang.reflect.Field delegateField = ESGraphQueryDAO.class.getDeclaredField("delegate"); + delegateField.setAccessible(true); + delegateField.set(dao, null); + + // Call destroy() - should not throw exception + dao.destroy(); + + // Test passes if no exception is thrown + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAOTest.java index 0851ef048734..ba9cb4118765 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/GraphQueryPITDAOTest.java @@ -15,9 +15,11 @@ import static io.datahubproject.test.search.SearchTestUtils.TEST_OS_SEARCH_CONFIG; import static io.datahubproject.test.search.SearchTestUtils.TEST_OS_SEARCH_CONFIG_NO_PIT; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -56,6 +58,8 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; import org.apache.lucene.search.TotalHits; import org.mockito.ArgumentCaptor; import org.opensearch.action.search.ClearScrollRequest; @@ -72,6 +76,7 @@ import org.opensearch.search.builder.SearchSourceBuilder; import org.skyscreamer.jsonassert.JSONAssert; import org.testng.Assert; +import org.testng.annotations.AfterMethod; import org.testng.annotations.Test; public class GraphQueryPITDAOTest { @@ -80,6 +85,39 @@ public class GraphQueryPITDAOTest { "elasticsearch/sample_filters/lineage_query_filters_limited.json"; private static final String TEST_QUERY_FILE_FULL = "elasticsearch/sample_filters/lineage_query_filters_full.json"; + + // Track created DAOs for cleanup + private final List createdDAOs = new ArrayList<>(); + + /** Create a GraphQueryPITDAO and track it for cleanup */ + private GraphQueryPITDAO createTrackedDAO(SearchClientShim client) { + return createTrackedDAO(client, TEST_GRAPH_SERVICE_CONFIG, TEST_OS_SEARCH_CONFIG); + } + + /** Create a GraphQueryPITDAO with custom configs and track it for cleanup */ + private GraphQueryPITDAO createTrackedDAO( + SearchClientShim client, + GraphServiceConfiguration graphConfig, + ElasticSearchConfiguration esConfig) { + GraphQueryPITDAO dao = new GraphQueryPITDAO(client, graphConfig, esConfig, null); + createdDAOs.add(dao); + return dao; + } + + @AfterMethod + public void cleanup() { + // Shutdown all created DAOs to prevent thread pool leaks + for (GraphQueryPITDAO dao : createdDAOs) { + try { + dao.shutdown(); + } catch (Exception e) { + // Log but don't fail the test + System.err.println("Failed to shutdown DAO: " + e.getMessage()); + } + } + createdDAOs.clear(); + } + private static final String TEST_QUERY_FILE_FULL_EMPTY_FILTERS = "elasticsearch/sample_filters/lineage_query_filters_full_empty_filters.json"; private static final String TEST_QUERY_FILE_FULL_MULTIPLE_FILTERS = @@ -1941,4 +1979,277 @@ public void testScrollSearchWithKeepAliveConfiguration() throws Exception { Assert.assertFalse(hasMessageInChain(e, "Point-in-Time creation is required")); } } + + @Test + public void testSearchSingleSliceWithPitThreadInterruption() throws Exception { + // Test that thread interruption is properly handled in searchSingleSliceWithPit + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Mock PIT creation + CreatePitResponse mockPitResponse = mock(CreatePitResponse.class); + when(mockPitResponse.getId()).thenReturn("test_pit_id"); + when(mockClient.createPit(any(CreatePitRequest.class), eq(RequestOptions.DEFAULT))) + .thenReturn(mockPitResponse); + + // Create a thread that will be interrupted + Thread testThread = + new Thread( + () -> { + try { + Urn sourceUrn = UrnUtils.getUrn("urn:li:dataset:test-urn"); + LineageGraphFilters filters = + LineageGraphFilters.forEntityType( + operationContext.getLineageRegistry(), + DATASET_ENTITY_NAME, + LineageDirection.DOWNSTREAM); + + // Start the search operation + dao.getImpactLineage(operationContext, sourceUrn, filters, 1); + } catch (Exception e) { + // Expected to throw exception due to interruption + } + }); + + // Start the thread and then interrupt it + testThread.start(); + + // Give the thread a moment to start, then interrupt it + Thread.sleep(100); + testThread.interrupt(); + + // Wait for the thread to complete + testThread.join(5000); + + // Verify that the thread completed (either successfully or with exception) + Assert.assertFalse(testThread.isAlive(), "Test thread should have completed"); + } + + @Test + public void testSearchSingleSliceWithPitThreadInterruptionException() throws Exception { + // Test that the specific RuntimeException is thrown when thread is interrupted + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Mock PIT creation + CreatePitResponse mockPitResponse = mock(CreatePitResponse.class); + when(mockPitResponse.getId()).thenReturn("test_pit_id"); + when(mockClient.createPit(any(CreatePitRequest.class), eq(RequestOptions.DEFAULT))) + .thenReturn(mockPitResponse); + + // Create a mock search response that will cause the method to enter the loop + SearchHit[] hits = createFakeLineageHits(1, "urn:li:dataset:test-urn", "dest", "DownstreamOf"); + SearchResponse searchResponse = createFakeSearchResponse(hits, 1); + + // Mock search to return the response, then throw interruption exception + when(mockClient.search(any(SearchRequest.class), eq(RequestOptions.DEFAULT))) + .thenAnswer( + invocation -> { + // Simulate thread interruption by checking Thread.currentThread().isInterrupted() + if (Thread.currentThread().isInterrupted()) { + throw new RuntimeException("Slice 0 was interrupted"); + } + return searchResponse; + }); + + Urn sourceUrn = UrnUtils.getUrn("urn:li:dataset:test-urn"); + LineageGraphFilters filters = + LineageGraphFilters.forEntityType( + operationContext.getLineageRegistry(), + DATASET_ENTITY_NAME, + LineageDirection.DOWNSTREAM); + + // Create a thread that will be interrupted + final RuntimeException[] caughtException = new RuntimeException[1]; + Thread testThread = + new Thread( + () -> { + try { + dao.getImpactLineage(operationContext, sourceUrn, filters, 1); + } catch (RuntimeException e) { + caughtException[0] = e; + } + }); + + // Start the thread and then interrupt it + testThread.start(); + + // Give the thread a moment to start, then interrupt it + Thread.sleep(100); + testThread.interrupt(); + + // Wait for the thread to complete + testThread.join(5000); + + // Verify that the specific interruption exception was caught + Assert.assertNotNull( + caughtException[0], "Expected RuntimeException to be thrown due to interruption"); + Assert.assertTrue( + caughtException[0].getMessage().contains("Failed to execute slice-based search"), + "Expected slice-based search failure message, got: " + caughtException[0].getMessage()); + } + + @Test + public void testShutdown() throws Exception { + // Test that shutdown method properly terminates the thread pool + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Verify thread pool is running + Assert.assertFalse(dao.pitExecutor.isShutdown(), "Thread pool should be running"); + + // Call shutdown + dao.shutdown(); + + // Verify thread pool is shutdown + Assert.assertTrue(dao.pitExecutor.isShutdown(), "Thread pool should be shutdown"); + Assert.assertTrue(dao.pitExecutor.isTerminated(), "Thread pool should be terminated"); + } + + @Test + public void testShutdownWithForcedTermination() throws Exception { + // Test shutdown when graceful termination fails and forced shutdown is needed + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Create a custom ExecutorService that simulates graceful shutdown failure + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.isShutdown()).thenReturn(false); + when(mockExecutor.awaitTermination(30, TimeUnit.SECONDS)) + .thenReturn(false); // Graceful shutdown fails + when(mockExecutor.awaitTermination(10, TimeUnit.SECONDS)) + .thenReturn(true); // Forced shutdown succeeds + + // Replace the executor with our mock + java.lang.reflect.Field executorField = GraphQueryPITDAO.class.getDeclaredField("pitExecutor"); + executorField.setAccessible(true); + executorField.set(dao, mockExecutor); + + // Call shutdown + dao.shutdown(); + + // Verify that shutdownNow was called + verify(mockExecutor).shutdown(); + verify(mockExecutor).shutdownNow(); + verify(mockExecutor).awaitTermination(30, TimeUnit.SECONDS); + verify(mockExecutor).awaitTermination(10, TimeUnit.SECONDS); + } + + @Test + public void testShutdownWithFailedForcedTermination() throws Exception { + // Test shutdown when both graceful and forced termination fail + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Create a custom ExecutorService that simulates both graceful and forced shutdown failure + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.isShutdown()).thenReturn(false); + when(mockExecutor.awaitTermination(30, TimeUnit.SECONDS)) + .thenReturn(false); // Graceful shutdown fails + when(mockExecutor.awaitTermination(10, TimeUnit.SECONDS)) + .thenReturn(false); // Forced shutdown also fails + + // Replace the executor with our mock + java.lang.reflect.Field executorField = GraphQueryPITDAO.class.getDeclaredField("pitExecutor"); + executorField.setAccessible(true); + executorField.set(dao, mockExecutor); + + // Call shutdown + dao.shutdown(); + + // Verify that shutdownNow was called and both awaitTermination calls were made + verify(mockExecutor).shutdown(); + verify(mockExecutor).shutdownNow(); + verify(mockExecutor).awaitTermination(30, TimeUnit.SECONDS); + verify(mockExecutor).awaitTermination(10, TimeUnit.SECONDS); + } + + @Test + public void testShutdownWithInterruptedException() throws Exception { + // Test shutdown when interrupted during awaitTermination + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Create a custom ExecutorService that throws InterruptedException + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.isShutdown()).thenReturn(false); + when(mockExecutor.awaitTermination(30, TimeUnit.SECONDS)) + .thenThrow(new InterruptedException("Test interruption")); + + // Replace the executor with our mock + java.lang.reflect.Field executorField = GraphQueryPITDAO.class.getDeclaredField("pitExecutor"); + executorField.setAccessible(true); + executorField.set(dao, mockExecutor); + + // Call shutdown + dao.shutdown(); + + // Verify that shutdownNow was called and thread was interrupted + verify(mockExecutor).shutdown(); + verify(mockExecutor).shutdownNow(); + verify(mockExecutor).awaitTermination(30, TimeUnit.SECONDS); + + // Verify that the current thread was interrupted + Assert.assertTrue(Thread.currentThread().isInterrupted(), "Thread should be interrupted"); + + // Clear the interrupt flag for other tests + Thread.interrupted(); + } + + @Test + public void testShutdownWhenAlreadyShutdown() throws Exception { + // Test shutdown when executor is already shutdown + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Create a custom ExecutorService that is already shutdown + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.isShutdown()).thenReturn(true); + + // Replace the executor with our mock + java.lang.reflect.Field executorField = GraphQueryPITDAO.class.getDeclaredField("pitExecutor"); + executorField.setAccessible(true); + executorField.set(dao, mockExecutor); + + // Call shutdown + dao.shutdown(); + + // Verify that no shutdown methods were called since it's already shutdown + verify(mockExecutor, never()).shutdown(); + verify(mockExecutor, never()).shutdownNow(); + verify(mockExecutor, never()).awaitTermination(anyLong(), any(TimeUnit.class)); + } + + @Test + public void testShutdownWhenExecutorIsNull() throws Exception { + // Test shutdown when executor is null + SearchClientShim mockClient = mock(SearchClientShim.class); + when(mockClient.getEngineType()).thenReturn(SearchClientShim.SearchEngineType.OPENSEARCH_2); + + GraphQueryPITDAO dao = createTrackedDAO(mockClient); + + // Set executor to null + java.lang.reflect.Field executorField = GraphQueryPITDAO.class.getDeclaredField("pitExecutor"); + executorField.setAccessible(true); + executorField.set(dao, null); + + // Call shutdown - should not throw exception + dao.shutdown(); + + // Test passes if no exception is thrown + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchClientShimElasticsearchIntegrationTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchClientShimElasticsearchIntegrationTest.java index b1f90a5a3b4e..c2e5e8b1e948 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchClientShimElasticsearchIntegrationTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchClientShimElasticsearchIntegrationTest.java @@ -709,7 +709,8 @@ public void testBulkProcessorOperations() throws IOException, InterruptedExcepti 10, // bulkRequestsLimit 5, // bulkFlushPeriod in seconds 1000, // retryInterval - 3 // numRetries + 3, // numRetries + 1 // threadCount ); // Add bulk requests diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/ESIndexBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/ESIndexBuilderTest.java index 5f1d67416f40..c9610e19691f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/ESIndexBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/ESIndexBuilderTest.java @@ -371,7 +371,7 @@ void testCleanIndex() { // This should not throw an exception try { - ESIndexBuilder.cleanIndex(searchClient, elasticSearchConfiguration, indexState); + ESIndexBuilder.cleanOrphanedIndices(searchClient, elasticSearchConfiguration, indexState); // If we get here without exception, test passes assertTrue(true); } catch (Exception e) { @@ -636,7 +636,7 @@ void testCleanIndex_DeletesOrphanedIndices() throws Exception { .thenReturn(deleteResponse); // Execute - ESIndexBuilder.cleanIndex(searchClient, elasticSearchConfiguration, indexState); + ESIndexBuilder.cleanOrphanedIndices(searchClient, elasticSearchConfiguration, indexState); // Verify deletion was attempted verify(searchClient, atLeastOnce()) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index 2510e42403d3..cf522a6a7616 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -429,10 +429,20 @@ public void testEmptyDescription() throws RemoteInvocationException, URISyntaxEx String entityUrn = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"; SearchDocumentTransformer test = new SearchDocumentTransformer(1000, 1000, 1000); + OperationContext opContext = + TestOperationContexts.systemContextNoSearchAuthorization( + RetrieverContext.builder() + .aspectRetriever(mock(AspectRetriever.class)) + .cachingAspectRetriever( + TestOperationContexts.emptyActiveUsersAspectRetriever(() -> ENTITY_REGISTRY)) + .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) + .build()); + // editedDescription - empty string Optional transformed = test.transformAspect( - mock(OperationContext.class), + opContext, UrnUtils.getUrn(entityUrn), new EditableDatasetProperties().setDescription(""), ENTITY_REGISTRY @@ -449,7 +459,7 @@ public void testEmptyDescription() throws RemoteInvocationException, URISyntaxEx // description - empty string transformed = test.transformAspect( - mock(OperationContext.class), + opContext, UrnUtils.getUrn(entityUrn), new DatasetProperties().setDescription(""), ENTITY_REGISTRY @@ -487,7 +497,6 @@ public void testStructuredPropertiesTransform() // Mock AspectRetriever and OperationContext AspectRetriever aspectRetriever = Mockito.mock(AspectRetriever.class); - OperationContext opContext = Mockito.mock(OperationContext.class); // Mock the aspectRetriever to return a structured property definition Map> mockDefinitions = new HashMap<>(); @@ -503,10 +512,19 @@ public void testStructuredPropertiesTransform() aspectMap.put(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, structuredPropertyDefinitionAspect); mockDefinitions.put(UrnUtils.getUrn(structuredPropertyUrn), aspectMap); - Mockito.when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); Mockito.when(aspectRetriever.getLatestAspectObjects(any(Set.class), any(Set.class))) .thenReturn(mockDefinitions); + OperationContext opContext = + TestOperationContexts.systemContextNoSearchAuthorization( + RetrieverContext.builder() + .aspectRetriever(aspectRetriever) + .cachingAspectRetriever( + TestOperationContexts.emptyActiveUsersAspectRetriever(() -> ENTITY_REGISTRY)) + .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) + .build()); + Optional transformed = test.transformAspect( opContext, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java index 0ad55edabdb3..cc30feaad3de 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/service/UpdateIndicesServiceTest.java @@ -2,9 +2,12 @@ import static com.linkedin.metadata.Constants.CONTAINER_ASPECT_NAME; import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -80,4 +83,89 @@ public void testContainerHandleDeleteEvent() throws Exception { eq(event.getCreated())); verify(updateGraphIndicesService).handleChangeEvent(operationContext, event); } + + @Test + public void testHandleChangeEventsCollection() throws Exception { + // Test the new handleChangeEvents method that takes a Collection + Urn urn1 = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset1,PROD)"); + Urn urn2 = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset2,PROD)"); + + // Create proper aspect data + com.linkedin.container.Container container1 = new com.linkedin.container.Container(); + container1.setContainer(UrnUtils.getUrn("urn:li:container:container1")); + + com.linkedin.container.Container container2 = new com.linkedin.container.Container(); + container2.setContainer(UrnUtils.getUrn("urn:li:container:container2")); + + MetadataChangeLog event1 = new MetadataChangeLog(); + event1.setChangeType(ChangeType.CREATE); + event1.setEntityUrn(urn1); + event1.setAspectName(CONTAINER_ASPECT_NAME); + event1.setEntityType(urn1.getEntityType()); + event1.setAspect(com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect(container1)); + event1.setSystemMetadata(SystemMetadataUtils.createDefaultSystemMetadata()); + event1.setCreated(AuditStampUtils.createDefaultAuditStamp()); + + MetadataChangeLog event2 = new MetadataChangeLog(); + event2.setChangeType(ChangeType.UPSERT); + event2.setEntityUrn(urn2); + event2.setAspectName(CONTAINER_ASPECT_NAME); + event2.setEntityType(urn2.getEntityType()); + event2.setAspect(com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect(container2)); + event2.setSystemMetadata(SystemMetadataUtils.createDefaultSystemMetadata()); + event2.setCreated(AuditStampUtils.createDefaultAuditStamp()); + + java.util.Collection events = java.util.List.of(event1, event2); + + // Execute batch processing + updateIndicesService.handleChangeEvents(operationContext, events); + + // Verify both events were processed + verify(updateGraphIndicesService, times(2)) + .handleChangeEvent(eq(operationContext), any(MetadataChangeLog.class)); + } + + @Test + public void testEmptySearchDocumentLogging() throws Exception { + // Test the change from log.info to log.debug for empty search documents + Urn urn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,EmptyDataset,PROD)"); + EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME); + AspectSpec aspectSpec = entitySpec.getAspectSpec(CONTAINER_ASPECT_NAME); + + // Create proper aspect data + com.linkedin.container.Container container = new com.linkedin.container.Container(); + container.setContainer(UrnUtils.getUrn("urn:li:container:empty")); + + MetadataChangeLog event = new MetadataChangeLog(); + event.setChangeType(ChangeType.CREATE); + event.setEntityUrn(urn); + event.setAspectName(CONTAINER_ASPECT_NAME); + event.setEntityType(urn.getEntityType()); + event.setAspect(com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect(container)); + event.setSystemMetadata(SystemMetadataUtils.createDefaultSystemMetadata()); + event.setCreated(AuditStampUtils.createDefaultAuditStamp()); + + // Mock empty search document + when(searchDocumentTransformer.transformAspect( + eq(operationContext), + eq(urn), + nullable(RecordTemplate.class), + eq(aspectSpec), + eq(false), + eq(event.getCreated()))) + .thenReturn(java.util.Optional.empty()); + + // Execute - this should trigger the empty document logging + updateIndicesService.handleChangeEvent(operationContext, event); + + // Verify the method was called + verify(searchDocumentTransformer) + .transformAspect( + eq(operationContext), + eq(urn), + nullable(RecordTemplate.class), + eq(aspectSpec), + eq(false), + eq(event.getCreated())); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/system_info/collectors/PropertiesCollectorConfigurationTest.java b/metadata-io/src/test/java/com/linkedin/metadata/system_info/collectors/PropertiesCollectorConfigurationTest.java index 73be1f977cb1..6a0b28d845f9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/system_info/collectors/PropertiesCollectorConfigurationTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/system_info/collectors/PropertiesCollectorConfigurationTest.java @@ -553,6 +553,9 @@ public PropertiesCollector propertiesCollector(Environment environment) { "elasticsearch.bulkProcessor.numRetries", "elasticsearch.bulkProcessor.refreshPolicy", "elasticsearch.bulkProcessor.requestsLimit", + "elasticsearch.bulkProcessor.sizeLimit", + "elasticsearch.bulkProcessor.threadCount", + "elasticsearch.dataNodeCount", "elasticsearch.bulkProcessor.retryInterval", "elasticsearch.connectionRequestTimeout", "elasticsearch.host", diff --git a/metadata-io/src/testFixtures/java/com/linkedin/metadata/EbeanTestUtils.java b/metadata-io/src/testFixtures/java/com/linkedin/metadata/EbeanTestUtils.java index ed5c882ace23..a38309321e96 100644 --- a/metadata-io/src/testFixtures/java/com/linkedin/metadata/EbeanTestUtils.java +++ b/metadata-io/src/testFixtures/java/com/linkedin/metadata/EbeanTestUtils.java @@ -15,6 +15,18 @@ public static Database createTestServer(String instanceId) { return DatabaseFactory.create(createTestingH2ServerConfig(instanceId)); } + @Nonnull + public static Database createNamedTestServer(String instanceId, String serverName) { + DatabaseConfig config = createTestingH2ServerConfig(instanceId); + config.setName(serverName); + config.setDefaultServer(false); // Explicitly set as non-default to avoid conflicts + + // Add the required entity packages for DataHub + config.getPackages().add("com.linkedin.metadata.entity.ebean"); + + return DatabaseFactory.create(config); + } + @Nonnull private static DatabaseConfig createTestingH2ServerConfig(String instanceId) { DataSourceConfig dataSourceConfig = new DataSourceConfig(); diff --git a/metadata-io/src/testFixtures/java/io/datahubproject/test/search/BulkProcessorTestUtils.java b/metadata-io/src/testFixtures/java/io/datahubproject/test/search/BulkProcessorTestUtils.java index 3543ae823d2c..579444030165 100644 --- a/metadata-io/src/testFixtures/java/io/datahubproject/test/search/BulkProcessorTestUtils.java +++ b/metadata-io/src/testFixtures/java/io/datahubproject/test/search/BulkProcessorTestUtils.java @@ -2,6 +2,7 @@ import co.elastic.clients.elasticsearch._helpers.bulk.BulkIngester; import co.elastic.clients.elasticsearch._helpers.bulk.BulkListener; +import com.linkedin.metadata.search.elasticsearch.client.shim.impl.AbstractBulkProcessorShim; import com.linkedin.metadata.search.elasticsearch.client.shim.impl.Es8SearchClientShim; import com.linkedin.metadata.search.elasticsearch.client.shim.impl.OpenSearch2SearchClientShim; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; @@ -22,11 +23,12 @@ public static void syncAfterWrite(ESBulkProcessor bulkProcessor) throws InterruptedException, IOException { bulkProcessor.flush(); final SearchClientShim searchClient = getRestHighLevelClient(bulkProcessor); + // if the bulks are big it takes time for Elastic/OpenSearch to process these bulk requests if (searchClient instanceof OpenSearch2SearchClientShim) { - getBulkProcessorListener(bulkProcessor).waitForBulkProcessed(); + getBulkProcessorListener((AbstractBulkProcessorShim) searchClient).waitForBulkProcessed(); } else if (searchClient instanceof Es8SearchClientShim) { - getBulkListener(bulkProcessor).waitForBulkProcessed(); + getBulkListener((AbstractBulkProcessorShim) searchClient).waitForBulkProcessed(); } waitForCompletion(searchClient); // some tasks might have refresh = false, so we need to refresh manually @@ -55,37 +57,69 @@ private static SearchClientShim getRestHighLevelClient(ESBulkProcessor esBulk } private static BulkProcessorProxyListener getBulkProcessorListener( - ESBulkProcessor esBulkProcessor) { - var searchClient = - (SearchClientShim) ReflectionTestUtils.getField(esBulkProcessor, "searchClient"); - var bulkProcessor = ReflectionTestUtils.getField(searchClient, "bulkProcessor"); - var bulkRequestHandler = ReflectionTestUtils.getField(bulkProcessor, "bulkRequestHandler"); - return (BulkProcessorProxyListener) - ReflectionTestUtils.getField(bulkRequestHandler, "listener"); + AbstractBulkProcessorShim abstractShim) { + var bulkProcessors = ReflectionTestUtils.getField(abstractShim, "bulkProcessors"); + + if (bulkProcessors instanceof Object[]) { + Object[] processors = (Object[]) bulkProcessors; + if (processors.length > 0 && processors[0] instanceof BulkProcessor) { + BulkProcessor processor = (BulkProcessor) processors[0]; + var bulkRequestHandler = ReflectionTestUtils.getField(processor, "bulkRequestHandler"); + return (BulkProcessorProxyListener) + ReflectionTestUtils.getField(bulkRequestHandler, "listener"); + } + } + return null; } - private static ESBulkProcessorProxyListener getBulkListener(ESBulkProcessor esBulkProcessor) { - var searchClient = - (SearchClientShim) ReflectionTestUtils.getField(esBulkProcessor, "searchClient"); - var bulkProcessor = ReflectionTestUtils.getField(searchClient, "bulkProcessor"); - return (ESBulkProcessorProxyListener) ReflectionTestUtils.getField(bulkProcessor, "listener"); + private static ESBulkProcessorProxyListener getBulkListener( + AbstractBulkProcessorShim abstractShim) { + var bulkProcessors = ReflectionTestUtils.getField(abstractShim, "bulkProcessors"); + + if (bulkProcessors instanceof Object[]) { + Object[] processors = (Object[]) bulkProcessors; + if (processors.length > 0 && processors[0] instanceof BulkIngester) { + BulkIngester processor = (BulkIngester) processors[0]; + return (ESBulkProcessorProxyListener) ReflectionTestUtils.getField(processor, "listener"); + } + } + return null; } public static void replaceBulkProcessorListener(ESBulkProcessor esBulkProcessor) { var searchClient = (SearchClientShim) ReflectionTestUtils.getField(esBulkProcessor, "searchClient"); - var bulkProcessor = ReflectionTestUtils.getField(searchClient, "bulkProcessor"); - if (bulkProcessor instanceof BulkProcessor) { - var bulkRequestHandler = ReflectionTestUtils.getField(bulkProcessor, "bulkRequestHandler"); - var bulkProcessorListener = - (BulkProcessor.Listener) ReflectionTestUtils.getField(bulkRequestHandler, "listener"); - ReflectionTestUtils.setField( - bulkRequestHandler, "listener", new BulkProcessorProxyListener(bulkProcessorListener)); - } else if (bulkProcessor instanceof BulkIngester) { - var bulkProcessorListener = - (BulkListener) ReflectionTestUtils.getField(bulkProcessor, "listener"); - ReflectionTestUtils.setField( - bulkProcessor, "listener", new ESBulkProcessorProxyListener(bulkProcessorListener)); + + // Cast to AbstractBulkProcessorShim to access bulkProcessors field + if (searchClient instanceof AbstractBulkProcessorShim) { + replaceBulkProcessorListener((AbstractBulkProcessorShim) searchClient); + return; + } + + throw new IllegalStateException("Failed to replaceBulkProcessorListener"); + } + + public static void replaceBulkProcessorListener(AbstractBulkProcessorShim abstractShim) { + var bulkProcessors = ReflectionTestUtils.getField(abstractShim, "bulkProcessors"); + + if (bulkProcessors instanceof Object[]) { + Object[] processors = (Object[]) bulkProcessors; + for (Object processor : processors) { + if (processor instanceof BulkProcessor) { + var bulkRequestHandler = ReflectionTestUtils.getField(processor, "bulkRequestHandler"); + var bulkProcessorListener = + (BulkProcessor.Listener) ReflectionTestUtils.getField(bulkRequestHandler, "listener"); + ReflectionTestUtils.setField( + bulkRequestHandler, + "listener", + new BulkProcessorProxyListener(bulkProcessorListener)); + } else if (processor instanceof BulkIngester) { + var bulkProcessorListener = + (BulkListener) ReflectionTestUtils.getField(processor, "listener"); + ReflectionTestUtils.setField( + processor, "listener", new ESBulkProcessorProxyListener(bulkProcessorListener)); + } + } } } } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java index cd91ceb5a32d..6213cbbee690 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java @@ -90,4 +90,7 @@ public class DataHubAppConfiguration { /** MCL Processing configurations */ private MCLProcessingConfiguration mclProcessing; + + /** Structured properties related configurations */ + private StructuredPropertiesConfiguration structuredProperties; } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/StructuredPropertiesConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/StructuredPropertiesConfiguration.java new file mode 100644 index 000000000000..34fabb3cb4e4 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/StructuredPropertiesConfiguration.java @@ -0,0 +1,22 @@ +package com.linkedin.metadata.config; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder(toBuilder = true) +public class StructuredPropertiesConfiguration { + + /** Whether structured properties mappings are applied */ + private boolean enabled; + + /** Whether structured property values can be written */ + private boolean writeEnabled; + + /** Whether structured property mappings are applied in system update job */ + private boolean systemUpdateEnabled; +} diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index c81aebec8e9e..95d89ba627d7 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -340,6 +340,7 @@ elasticsearch: opensearchUseAwsIamAuth: ${OPENSEARCH_USE_AWS_IAM_AUTH:false} region: ${AWS_REGION:#{null}} idHashAlgo: ${ELASTIC_ID_HASH_ALGO:MD5} + dataNodeCount: ${ELASTICSEARCH_DATA_NODE_COUNT:1} # Multi-client shim configuration shim: # Enable the search client shim (false = use legacy RestHighLevelClient) @@ -376,7 +377,7 @@ elasticsearch: enableBatchDelete: ${ES_BULK_ENABLE_BATCH_DELETE:false} index: prefix: ${INDEX_PREFIX:} - numShards: ${ELASTICSEARCH_NUM_SHARDS_PER_INDEX:1} + numShards: ${ELASTICSEARCH_NUM_SHARDS_PER_INDEX:${elasticsearch.dataNodeCount}} numReplicas: ${ELASTICSEARCH_NUM_REPLICAS_PER_INDEX:1} numRetries: ${ELASTICSEARCH_INDEX_BUILDER_NUM_RETRIES:3} refreshIntervalSeconds: ${ELASTICSEARCH_INDEX_BUILDER_REFRESH_INTERVAL_SECONDS:3} # increase to 30 if expected indexing rates to be greater than 100/s @@ -435,9 +436,9 @@ elasticsearch: impact: maxHops: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_HOPS:1000} # the maximum hops to traverse for impact analysis maxRelations: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_RELATIONS:40000} # maximum number of relationships - slices: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_SLICES:2} # number of slices for parallel search operations - keepAlive: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_KEEP_ALIVE:5m} # Point-in-Time keepAlive duration for impact analysis queries - maxThreads: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_THREADS:32} # maximum parallel lineage graph queries + slices: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_SLICES:${elasticsearch.dataNodeCount}} # number of slices for parallel search operations + keepAlive: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_KEEP_ALIVE:55s} # Point-in-Time keepAlive duration for impact analysis queries + maxThreads: ${ELASTICSEARCH_SEARCH_GRAPH_IMPACT_MAX_THREADS:16} # maximum parallel lineage graph queries queryOptimization: ${ELASTICSEARCH_SEARCH_GRAPH_QUERY_OPTIMIZATION:true} # reduce query nesting if possible # TODO: Kafka topic convention diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java index 31753b9fe9b9..305ef7b862ed 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java @@ -35,7 +35,8 @@ protected GraphService getInstance( final ConfigurationProvider configurationProvider, final EntityRegistry entityRegistry, @Value("${elasticsearch.idHashAlgo}") final String idHashAlgo, - MetricUtils metricUtils) { + MetricUtils metricUtils, + @Qualifier("esGraphQueryDAO") final ESGraphQueryDAO esGraphQueryDAO) { LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry); return new ElasticSearchGraphService( lineageRegistry, @@ -46,12 +47,19 @@ protected GraphService getInstance( components.getBulkProcessor(), components.getConfig().getBulkProcessor().getNumRetries(), configurationProvider.getElasticSearch().getSearch().getGraph()), - new ESGraphQueryDAO( - components.getSearchClient(), - configurationProvider.getGraphService(), - configurationProvider.getElasticSearch(), - metricUtils), + esGraphQueryDAO, components.getIndexBuilder(), idHashAlgo); } + + @Bean(name = "esGraphQueryDAO") + @Nonnull + protected ESGraphQueryDAO createESGraphQueryDAO( + final ConfigurationProvider configurationProvider, MetricUtils metricUtils) { + return new ESGraphQueryDAO( + components.getSearchClient(), + configurationProvider.getGraphService(), + configurationProvider.getElasticSearch(), + metricUtils); + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index 9b9f4996463c..d06df4032aad 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -29,9 +29,6 @@ public class ConfigurationProvider extends DataHubAppConfiguration { /** Configuration for the health check server */ private HealthCheckConfiguration healthCheck; - /** Structured properties related configurations */ - private StructuredPropertiesConfiguration structuredProperties; - /** Enable/disable DataHub analytics */ private PlatformAnalyticsConfiguration platformAnalytics; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/StructuredPropertiesConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/StructuredPropertiesConfiguration.java deleted file mode 100644 index 6d4d4ea30c86..000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/StructuredPropertiesConfiguration.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.linkedin.gms.factory.config; - -import lombok.Data; - -@Data -public class StructuredPropertiesConfiguration { - private boolean enabled; - private boolean writeEnabled; - private boolean systemUpdateEnabled; -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 0231e7d4884c..316dc8f6f56b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -17,6 +17,7 @@ import com.linkedin.metadata.aspect.validation.CreateIfNotExistsValidator; import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; import com.linkedin.metadata.aspect.validation.FieldPathValidator; +import com.linkedin.metadata.aspect.validation.PolicyFieldTypeValidator; import com.linkedin.metadata.aspect.validation.PrivilegeConstraintsValidator; import com.linkedin.metadata.aspect.validation.SystemPolicyValidator; import com.linkedin.metadata.aspect.validation.UrnAnnotationValidator; @@ -561,4 +562,21 @@ public AspectPayloadValidator systemPolicyValidator(ConfigurationProvider config .build())) .build()); } + + @Bean + public AspectPayloadValidator policyFieldTypeValidator() { + return new PolicyFieldTypeValidator() + .setConfig( + AspectPluginConfig.builder() + .className(PolicyFieldTypeValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(CREATE, CREATE_ENTITY, UPSERT, UPDATE)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(POLICY_ENTITY_NAME) + .aspectName(DATAHUB_POLICY_INFO_ASPECT_NAME) + .build())) + .build()); + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java index ee1b3e60877f..5e2527d95cf4 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java @@ -40,6 +40,9 @@ public class ElasticSearchBulkProcessorFactory { @Value("${elasticsearch.bulkProcessor.refreshPolicy}") private String refreshPolicy; + @Value("${elasticsearch.threadCount}") + private Integer threadCount; + @Bean(name = "elasticSearchBulkProcessor") @Nonnull protected ESBulkProcessor getInstance(MetricUtils metricUtils) { @@ -49,6 +52,7 @@ protected ESBulkProcessor getInstance(MetricUtils metricUtils) { .bulkRequestsLimit(bulkRequestsLimit) .retryInterval(retryInterval) .numRetries(numRetries) + .threadCount(threadCount) .batchDelete(enableBatchDelete) .writeRequestRefreshPolicy(WriteRequest.RefreshPolicy.valueOf(refreshPolicy)) .build(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/views/ViewServiceFactory.java similarity index 92% rename from metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java rename to metadata-service/factories/src/main/java/com/linkedin/gms/factory/views/ViewServiceFactory.java index d9423bec7026..9a9510405ac9 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/views/ViewServiceFactory.java @@ -1,4 +1,4 @@ -package com.linkedin.gms.factory.search.views; +package com.linkedin.gms.factory.views; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.service.ViewService; diff --git a/metadata-service/restli-api/build.gradle b/metadata-service/restli-api/build.gradle index 38a3e9298ff9..00ed0488efb5 100644 --- a/metadata-service/restli-api/build.gradle +++ b/metadata-service/restli-api/build.gradle @@ -20,5 +20,8 @@ dependencies { restClientCompile(externalDependency.grpcProtobuf) { because("CVE-2023-1428, CVE-2023-32731") } + restClientCompile(externalDependency.grpcNettyShaded) { + because("Security vulnerability in grpc-netty-shaded 1.68.3") + } } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java index dad7e14cfcdf..15856085a8b7 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/SearchIndicesService.java @@ -2,9 +2,13 @@ import com.linkedin.mxe.MetadataChangeLog; import io.datahubproject.metadata.context.OperationContext; +import java.util.Collection; import javax.annotation.Nonnull; public interface SearchIndicesService { void handleChangeEvent( @Nonnull OperationContext opContext, @Nonnull MetadataChangeLog metadataChangeLog); + + void handleChangeEvents( + @Nonnull OperationContext opContext, @Nonnull Collection events); } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java index 194ff222f7b9..a8b7037a3fc3 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/UrnValidationUtil.java @@ -74,7 +74,7 @@ public static void validateUrn( "Simple URN %s contains comma character which is not allowed in non-tuple URNs", urn)); } else { - log.error( + log.warn( "Simple URN {} contains comma character which is not allowed in non-tuple URNs", urn); } } @@ -91,7 +91,7 @@ public static void validateUrn( if (strict) { throw new IllegalArgumentException(message); } else { - log.error(message); + log.warn(message); } } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/SearchClientShim.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/SearchClientShim.java index 4999bac10fb3..eca17845e989 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/SearchClientShim.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/elasticsearch/SearchClientShim.java @@ -317,7 +317,8 @@ void generateAsyncBulkProcessor( int bulkRequestsLimit, long bulkFlushPeriod, long retryInterval, - int numRetries); + int numRetries, + int threadCount); void generateBulkProcessor( WriteRequest.RefreshPolicy writeRequestRefreshPolicy, @@ -325,10 +326,13 @@ void generateBulkProcessor( int bulkRequestsLimit, long bulkFlushPeriod, long retryInterval, - int numRetries); + int numRetries, + int threadCount); void addBulk(DocWriteRequest writeRequest); + void addBulk(String urn, DocWriteRequest writeRequest); + void flushBulkProcessor(); void closeBulkProcessor(); diff --git a/smoke-test/CLAUDE.MD b/smoke-test/CLAUDE.MD new file mode 100644 index 000000000000..a3e3ca155ed7 --- /dev/null +++ b/smoke-test/CLAUDE.MD @@ -0,0 +1,42 @@ +# Smoke Test Guidelines + +## Test Principles + +- Test should be idempotent + - should not depend on an empty state of the system + - should not depend on the order in which they run + - should not depend on another test doing their cleanup + +## Common Utilities + +### Core Utilities (`tests/utils.py`) + +- **`execute_graphql(auth_session, query, variables)`** - Execute GraphQL queries with standard error handling +- **`ingest_file_via_rest(auth_session, file_path)`** - Ingest metadata from JSON file +- **`delete_urns_from_file(graph_client, file_path)`** - Clean up entities from JSON file +- **`get_sleep_info()`** - Get retry timing for eventual consistency +- **`wait_for_writes_to_sync()`** - Wait for async operations to complete + +### GraphQL Pattern + +```python +from tests.utils import execute_graphql +from typing import Any, Dict + +query = """query getDataset($urn: String!) { dataset(urn: $urn) { name } }""" +variables: Dict[str, Any] = {"urn": dataset_urn} +res_data = execute_graphql(auth_session, query, variables) +assert res_data["data"]["dataset"]["name"] == "expected" +``` + +### Fixture Pattern + +```python +@pytest.fixture(scope="module", autouse=True) +def ingest_cleanup_data(auth_session, graph_client): + print("ingesting test data") + ingest_file_via_rest(auth_session, "tests/my_test/data.json") + yield + print("removing test data") + delete_urns_from_file(graph_client, "tests/my_test/data.json") +``` \ No newline at end of file diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py index 004cc6ac6ef3..0ed19240a32f 100644 --- a/smoke-test/test_e2e.py +++ b/smoke-test/test_e2e.py @@ -15,6 +15,7 @@ pytestmark = pytest.mark.no_cypress_suite1 from tests.utils import ( + execute_graphql, get_kafka_broker_url, get_kafka_schema_registry, get_sleep_info, @@ -60,23 +61,17 @@ def _ensure_user_present(auth_session, urn: str): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_user_relationship_present(auth_session, urn, relationships): - json = { - "query": """query corpUser($urn: String!) {\n - corpUser(urn: $urn) {\n - urn\n - relationships(input: { types: ["IsMemberOfNativeGroup"], direction: OUTGOING, start: 0, count: 1 }) {\n - total\n - }\n - }\n - }""", - "variables": {"urn": urn}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + query = """query corpUser($urn: String!) { + corpUser(urn: $urn) { + urn + relationships(input: { types: ["IsMemberOfNativeGroup"], direction: OUTGOING, start: 0, count: 1 }) { + total + } + } + }""" + variables = {"urn": urn} + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["corpUser"] assert res_data["data"]["corpUser"]["relationships"] assert res_data["data"]["corpUser"]["relationships"]["total"] == relationships @@ -109,23 +104,17 @@ def _ensure_dataset_present( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_group_not_present(auth_session, urn: str) -> Any: - json = { - "query": """query corpGroup($urn: String!) {\n - corpGroup(urn: $urn) {\n - urn\n - properties {\n - displayName\n - }\n - }\n - }""", - "variables": {"urn": urn}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + query = """query corpGroup($urn: String!) { + corpGroup(urn: $urn) { + urn + properties { + displayName + } + } + }""" + variables = {"urn": urn} + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["corpGroup"] assert res_data["data"]["corpGroup"]["properties"] is None @@ -338,33 +327,25 @@ def test_frontend_auth(auth_session): def test_frontend_browse_datasets(auth_session): + query = """query browse($input: BrowseInput!) { + browse(input: $input) { + start + count + total + groups { + name + } + entities { + ... on Dataset { + urn + name + } + } + } + }""" + variables = {"input": {"type": "DATASET", "path": ["prod"]}} + res_data = execute_graphql(auth_session, query, variables) - json = { - "query": """query browse($input: BrowseInput!) {\n - browse(input: $input) {\n - start\n - count\n - total\n - groups { - name - } - entities {\n - ... on Dataset {\n - urn\n - name\n - }\n - }\n - }\n - }""", - "variables": {"input": {"type": "DATASET", "path": ["prod"]}}, - } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["browse"] assert len(res_data["data"]["browse"]["entities"]) == 0 assert len(res_data["data"]["browse"]["groups"]) > 0 @@ -379,34 +360,26 @@ def test_frontend_browse_datasets(auth_session): ], ) def test_frontend_search_datasets(auth_session, query, min_expected_results): - - json = { - "query": """query search($input: SearchInput!) {\n - search(input: $input) {\n - start\n - count\n - total\n - searchResults {\n - entity {\n - ... on Dataset {\n - urn\n - name\n - }\n - }\n - }\n - }\n - }""", - "variables": { - "input": {"type": "DATASET", "query": f"{query}", "start": 0, "count": 10} - }, + graphql_query = """query search($input: SearchInput!) { + search(input: $input) { + start + count + total + searchResults { + entity { + ... on Dataset { + urn + name + } + } + } + } + }""" + variables = { + "input": {"type": "DATASET", "query": f"{query}", "start": 0, "count": 10} } + res_data = execute_graphql(auth_session, graphql_query, variables) - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["search"] assert res_data["data"]["search"]["total"] >= min_expected_results assert len(res_data["data"]["search"]["searchResults"]) >= min_expected_results @@ -421,34 +394,26 @@ def test_frontend_search_datasets(auth_session, query, min_expected_results): ], ) def test_frontend_search_across_entities(auth_session, query, min_expected_results): - - json = { - "query": """query searchAcrossEntities($input: SearchAcrossEntitiesInput!) {\n - searchAcrossEntities(input: $input) {\n - start\n - count\n - total\n - searchResults {\n - entity {\n - ... on Dataset {\n - urn\n - name\n - }\n - }\n - }\n - }\n - }""", - "variables": { - "input": {"types": [], "query": f"{query}", "start": 0, "count": 10} - }, + graphql_query = """query searchAcrossEntities($input: SearchAcrossEntitiesInput!) { + searchAcrossEntities(input: $input) { + start + count + total + searchResults { + entity { + ... on Dataset { + urn + name + } + } + } + } + }""" + variables = { + "input": {"types": [], "query": f"{query}", "start": 0, "count": 10} } + res_data = execute_graphql(auth_session, graphql_query, variables) - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["searchAcrossEntities"] assert res_data["data"]["searchAcrossEntities"]["total"] >= min_expected_results assert ( @@ -458,32 +423,25 @@ def test_frontend_search_across_entities(auth_session, query, min_expected_resul def test_frontend_user_info(auth_session): - urn = get_root_urn() - json = { - "query": """query corpUser($urn: String!) {\n - corpUser(urn: $urn) {\n - urn\n - username\n - editableInfo {\n - pictureLink\n - }\n - info {\n - firstName\n - fullName\n - title\n - email\n - }\n - }\n - }""", - "variables": {"urn": urn}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + query = """query corpUser($urn: String!) { + corpUser(urn: $urn) { + urn + username + editableInfo { + pictureLink + } + info { + firstName + fullName + title + email + } + } + }""" + variables = {"urn": urn} + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["corpUser"] assert res_data["data"]["corpUser"]["urn"] == urn @@ -507,31 +465,24 @@ def test_frontend_user_info(auth_session): ) def test_frontend_datasets(auth_session, platform, dataset_name, env): urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - json = { - "query": """query getDataset($urn: String!) {\n - dataset(urn: $urn) {\n - urn\n - name\n - description\n - platform {\n - urn\n - }\n - schemaMetadata {\n - name\n - version\n - createdAt\n - }\n - }\n - }""", - "variables": {"urn": urn}, - } - # Basic dataset info. - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + query = """query getDataset($urn: String!) { + dataset(urn: $urn) { + urn + name + description + platform { + urn + } + schemaMetadata { + name + version + createdAt + } + } + }""" + variables = {"urn": urn} + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["urn"] == urn assert res_data["data"]["dataset"]["name"] == dataset_name @@ -628,81 +579,65 @@ def test_ingest_without_system_metadata(auth_session): def test_frontend_app_config(auth_session): + query = """query appConfig { + appConfig { + analyticsConfig { + enabled + } + policiesConfig { + enabled + platformPrivileges { + type + displayName + description + } + resourcePrivileges { + resourceType + resourceTypeDisplayName + entityType + privileges { + type + displayName + description + } + } + } + } + }""" + res_data = execute_graphql(auth_session, query) - json = { - "query": """query appConfig {\n - appConfig {\n - analyticsConfig {\n - enabled\n - }\n - policiesConfig {\n - enabled\n - platformPrivileges {\n - type\n - displayName\n - description\n - }\n - resourcePrivileges {\n - resourceType\n - resourceTypeDisplayName\n - entityType\n - privileges {\n - type\n - displayName\n - description\n - }\n - }\n - }\n - }\n - }""" - } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["appConfig"] assert res_data["data"]["appConfig"]["analyticsConfig"]["enabled"] is True assert res_data["data"]["appConfig"]["policiesConfig"]["enabled"] is True def test_frontend_me_query(auth_session): + query = """query me { + me { + corpUser { + urn + username + editableInfo { + pictureLink + } + info { + firstName + fullName + title + email + } + } + platformPrivileges { + viewAnalytics + managePolicies + manageIdentities + manageUserCredentials + generatePersonalAccessTokens + } + } + }""" + res_data = execute_graphql(auth_session, query) - json = { - "query": """query me {\n - me {\n - corpUser {\n - urn\n - username\n - editableInfo {\n - pictureLink\n - }\n - info {\n - firstName\n - fullName\n - title\n - email\n - }\n - }\n - platformPrivileges {\n - viewAnalytics - managePolicies - manageIdentities - manageUserCredentials - generatePersonalAccessTokens - }\n - }\n - }""" - } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["me"]["corpUser"]["urn"] == get_root_urn() assert res_data["data"]["me"]["platformPrivileges"]["viewAnalytics"] is True assert res_data["data"]["me"]["platformPrivileges"]["managePolicies"] is True @@ -715,36 +650,29 @@ def test_frontend_me_query(auth_session): def test_list_users(auth_session): - - json = { - "query": """query listUsers($input: ListUsersInput!) {\n - listUsers(input: $input) {\n - start\n - count\n - total\n - users {\n - urn\n - type\n - username\n - properties {\n - firstName - }\n - }\n - }\n - }""", - "variables": { - "input": { - "start": 0, - "count": 2, + query = """query listUsers($input: ListUsersInput!) { + listUsers(input: $input) { + start + count + total + users { + urn + type + username + properties { + firstName + } } - }, + } + }""" + variables = { + "input": { + "start": 0, + "count": 2, + } } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["listUsers"] assert res_data["data"]["listUsers"]["start"] == 0 assert res_data["data"]["listUsers"]["count"] == 2 @@ -755,36 +683,29 @@ def test_list_users(auth_session): @pytest.mark.dependency() def test_list_groups(auth_session): - - json = { - "query": """query listGroups($input: ListGroupsInput!) {\n - listGroups(input: $input) {\n - start\n - count\n - total\n - groups {\n - urn\n - type\n - name\n - properties {\n - displayName - }\n - }\n - }\n - }""", - "variables": { - "input": { - "start": 0, - "count": 2, + query = """query listGroups($input: ListGroupsInput!) { + listGroups(input: $input) { + start + count + total + groups { + urn + type + name + properties { + displayName + } } - }, + } + }""" + variables = { + "input": { + "start": 0, + "count": 2, + } } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["listGroups"] assert res_data["data"]["listGroups"]["start"] == 0 assert res_data["data"]["listGroups"]["count"] == 2 @@ -797,60 +718,45 @@ def test_list_groups(auth_session): depends=["test_list_groups"] ) def test_add_remove_members_from_group(auth_session): - # Assert no group edges for user jdoe - json = { - "query": """query corpUser($urn: String!) {\n - corpUser(urn: $urn) {\n - urn\n - relationships(input: { types: ["IsMemberOfNativeGroup"], direction: OUTGOING, start: 0, count: 1 }) {\n - total\n - }\n - }\n - }""", - "variables": {"urn": "urn:li:corpuser:jdoe"}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + query = """query corpUser($urn: String!) { + corpUser(urn: $urn) { + urn + relationships(input: { types: ["IsMemberOfNativeGroup"], direction: OUTGOING, start: 0, count: 1 }) { + total + } + } + }""" + variables = {"urn": "urn:li:corpuser:jdoe"} + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["corpUser"] assert res_data["data"]["corpUser"]["relationships"]["total"] == 0 # Add jdoe to group - json = { - "query": """mutation addGroupMembers($input: AddGroupMembersInput!) {\n - addGroupMembers(input: $input) }""", - "variables": { - "input": { - "groupUrn": "urn:li:corpGroup:bfoo", - "userUrns": ["urn:li:corpuser:jdoe"], - } - }, + mutation = """mutation addGroupMembers($input: AddGroupMembersInput!) { + addGroupMembers(input: $input) }""" + variables = { + "input": { + "groupUrn": "urn:li:corpGroup:bfoo", + "userUrns": ["urn:li:corpuser:jdoe"], + } } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() + execute_graphql(auth_session, mutation, variables) # Verify the member has been added _ensure_user_relationship_present(auth_session, "urn:li:corpuser:jdoe", 1) # Now remove jdoe from the group - json = { - "query": """mutation removeGroupMembers($input: RemoveGroupMembersInput!) {\n - removeGroupMembers(input: $input) }""", - "variables": { - "input": { - "groupUrn": "urn:li:corpGroup:bfoo", - "userUrns": ["urn:li:corpuser:jdoe"], - } - }, + mutation = """mutation removeGroupMembers($input: RemoveGroupMembersInput!) { + removeGroupMembers(input: $input) }""" + variables = { + "input": { + "groupUrn": "urn:li:corpGroup:bfoo", + "userUrns": ["urn:li:corpuser:jdoe"], + } } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() + execute_graphql(auth_session, mutation, variables) # Verify the member has been removed _ensure_user_relationship_present(auth_session, "urn:li:corpuser:jdoe", 0) @@ -862,29 +768,22 @@ def test_update_corp_group_properties(auth_session): group_urn = "urn:li:corpGroup:bfoo" # Update Corp Group Description - json = { - "query": """mutation updateCorpGroupProperties($urn: String!, $input: CorpGroupUpdateInput!) {\n - updateCorpGroupProperties(urn: $urn, input: $input) { urn } }""", - "variables": { - "urn": group_urn, - "input": { - "description": "My test description", - "slack": "test_group_slack", - "email": "test_group_email@email.com", - }, + query = """mutation updateCorpGroupProperties($urn: String!, $input: CorpGroupUpdateInput!) {\n + updateCorpGroupProperties(urn: $urn, input: $input) { urn } }""" + variables = { + "urn": group_urn, + "input": { + "description": "My test description", + "slack": "test_group_slack", + "email": "test_group_email@email.com", }, } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) print(res_data) - assert "errors" not in res_data assert res_data["data"]["updateCorpGroupProperties"] is not None # Verify the description has been updated - json = { - "query": """query corpGroup($urn: String!) {\n + query = """query corpGroup($urn: String!) {\n corpGroup(urn: $urn) {\n urn\n editableProperties {\n @@ -893,15 +792,11 @@ def test_update_corp_group_properties(auth_session): email\n }\n }\n - }""", - "variables": {"urn": group_urn}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + }""" + variables = {"urn": group_urn} + res_data = execute_graphql(auth_session, query, variables) assert res_data - assert "errors" not in res_data assert res_data["data"] assert res_data["data"]["corpGroup"] assert res_data["data"]["corpGroup"]["editableProperties"] @@ -912,17 +807,13 @@ def test_update_corp_group_properties(auth_session): } # Reset the editable properties - json = { - "query": """mutation updateCorpGroupProperties($urn: String!, $input: CorpGroupUpdateInput!) {\n - updateCorpGroupProperties(urn: $urn, input: $input) { urn } }""", - "variables": { - "urn": group_urn, - "input": {"description": "", "slack": "", "email": ""}, - }, + query = """mutation updateCorpGroupProperties($urn: String!, $input: CorpGroupUpdateInput!) {\n + updateCorpGroupProperties(urn: $urn, input: $input) { urn } }""" + variables = { + "urn": group_urn, + "input": {"description": "", "slack": "", "email": ""}, } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() + execute_graphql(auth_session, query, variables) @pytest.mark.dependency( @@ -935,39 +826,28 @@ def test_update_corp_group_description(auth_session): group_urn = "urn:li:corpGroup:bfoo" # Update Corp Group Description - json = { - "query": """mutation updateDescription($input: DescriptionUpdateInput!) {\n - updateDescription(input: $input) }""", - "variables": { - "input": {"description": "My test description", "resourceUrn": group_urn}, - }, + query = """mutation updateDescription($input: DescriptionUpdateInput!) {\n + updateDescription(input: $input) }""" + variables = { + "input": {"description": "My test description", "resourceUrn": group_urn}, } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) print(res_data) - assert "errors" not in res_data assert res_data["data"]["updateDescription"] is True # Verify the description has been updated - json = { - "query": """query corpGroup($urn: String!) {\n + query = """query corpGroup($urn: String!) {\n corpGroup(urn: $urn) {\n urn\n editableProperties {\n description\n }\n }\n - }""", - "variables": {"urn": group_urn}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + }""" + variables = {"urn": group_urn} + res_data = execute_graphql(auth_session, query, variables) assert res_data - assert "errors" not in res_data assert res_data["data"] assert res_data["data"]["corpGroup"] assert res_data["data"]["corpGroup"]["editableProperties"] @@ -977,16 +857,12 @@ def test_update_corp_group_description(auth_session): ) # Reset Corp Group Description - json = { - "query": """mutation updateDescription($input: DescriptionUpdateInput!) {\n - updateDescription(input: $input) }""", - "variables": { - "input": {"description": "", "resourceUrn": group_urn}, - }, + query = """mutation updateDescription($input: DescriptionUpdateInput!) {\n + updateDescription(input: $input) }""" + variables = { + "input": {"description": "", "resourceUrn": group_urn}, } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() + execute_graphql(auth_session, query, variables) @pytest.mark.dependency( @@ -997,32 +873,23 @@ def test_update_corp_group_description(auth_session): ) def test_remove_user(auth_session): - json = { - "query": """mutation removeUser($urn: String!) {\n - removeUser(urn: $urn) }""", - "variables": {"urn": "urn:li:corpuser:jdoe"}, - } + query = """mutation removeUser($urn: String!) {\n + removeUser(urn: $urn) }""" + variables = {"urn": "urn:li:corpuser:jdoe"} + execute_graphql(auth_session, query, variables) - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - - json = { - "query": """query corpUser($urn: String!) {\n + query = """query corpUser($urn: String!) {\n corpUser(urn: $urn) {\n urn\n properties {\n firstName\n }\n }\n - }""", - "variables": {"urn": "urn:li:corpuser:jdoe"}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + }""" + variables = {"urn": "urn:li:corpuser:jdoe"} + res_data = execute_graphql(auth_session, query, variables) assert res_data - assert "errors" not in res_data assert res_data["data"] assert res_data["data"]["corpUser"] assert res_data["data"]["corpUser"]["properties"] is None @@ -1037,14 +904,10 @@ def test_remove_user(auth_session): def test_remove_group(auth_session): group_urn = "urn:li:corpGroup:bfoo" - json = { - "query": """mutation removeGroup($urn: String!) {\n - removeGroup(urn: $urn) }""", - "variables": {"urn": group_urn}, - } - - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() + query = """mutation removeGroup($urn: String!) {\n + removeGroup(urn: $urn) }""" + variables = {"urn": group_urn} + execute_graphql(auth_session, query, variables) _ensure_group_not_present(auth_session, group_urn) @@ -1057,35 +920,27 @@ def test_remove_group(auth_session): ) def test_create_group(auth_session): - json = { - "query": """mutation createGroup($input: CreateGroupInput!) {\n - createGroup(input: $input) }""", - "variables": { - "input": { - "id": "test-id", - "name": "Test Group", - "description": "My test group", - } - }, + query = """mutation createGroup($input: CreateGroupInput!) {\n + createGroup(input: $input) }""" + variables = { + "input": { + "id": "test-id", + "name": "Test Group", + "description": "My test group", + } } + execute_graphql(auth_session, query, variables) - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - - json = { - "query": """query corpGroup($urn: String!) {\n + query = """query corpGroup($urn: String!) {\n corpGroup(urn: $urn) {\n urn\n properties {\n displayName\n }\n }\n - }""", - "variables": {"urn": "urn:li:corpGroup:test-id"}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + }""" + variables = {"urn": "urn:li:corpGroup:test-id"} + res_data = execute_graphql(auth_session, query, variables) assert res_data assert res_data["data"] diff --git a/smoke-test/test_rapid.py b/smoke-test/test_rapid.py index 9d898fa645dd..204bb3ffc49d 100644 --- a/smoke-test/test_rapid.py +++ b/smoke-test/test_rapid.py @@ -1,7 +1,9 @@ import pytest import tenacity +from typing import Any, Dict from tests.utils import ( + execute_graphql, ingest_file_via_rest, get_sleep_info, ) @@ -17,45 +19,39 @@ ) def _ensure_dataset_present_correctly(auth_session): urn = "urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)" - json = { - "query": """query getDataset($urn: String!) {\n - dataset(urn: $urn) {\n - urn\n - name\n - description\n - platform {\n - urn\n - }\n - schemaMetadata {\n - name\n - version\n - createdAt\n - }\n - outgoing: relationships(\n - input: { types: ["DownstreamOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 2000 }\n - ) {\n - start\n - count\n - total\n - relationships {\n - type\n - direction\n - entity {\n - urn\n - type\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": urn}, - } - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() + query = """query getDataset($urn: String!) { + dataset(urn: $urn) { + urn + name + description + platform { + urn + } + schemaMetadata { + name + version + createdAt + } + outgoing: relationships( + input: { types: ["DownstreamOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 2000 } + ) { + start + count + total + relationships { + type + direction + entity { + urn + type + } + } + } + } + }""" + variables: Dict[str, Any] = {"urn": urn} + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["urn"] == urn assert len(res_data["data"]["dataset"]["outgoing"]["relationships"]) == 1 diff --git a/smoke-test/tests/audit_events/audit_events_test.py b/smoke-test/tests/audit_events/audit_events_test.py index 389692703c2b..4835c7b83784 100644 --- a/smoke-test/tests/audit_events/audit_events_test.py +++ b/smoke-test/tests/audit_events/audit_events_test.py @@ -125,15 +125,24 @@ def access_token_setup(auth_session, auth_exclude_filter): res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] + + if res_data["data"]["listAccessTokens"]["tokens"]: + for metadata in res_data["data"]["listAccessTokens"]["tokens"]: + revokeAccessToken(admin_session, metadata["id"]) + wait_for_writes_to_sync() + + # Verify clean state after cleanup + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data["data"]["listAccessTokens"]["total"] == 0 assert not res_data["data"]["listAccessTokens"]["tokens"] yield - # Clean up + # Clean up after the test res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) for metadata in res_data["data"]["listAccessTokens"]["tokens"]: revokeAccessToken(admin_session, metadata["id"]) + wait_for_writes_to_sync() def test_audit_token_events(auth_exclude_filter): diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py index bee0df5510c4..1281602fc07c 100644 --- a/smoke-test/tests/browse/browse_test.py +++ b/smoke-test/tests/browse/browse_test.py @@ -1,6 +1,8 @@ +from typing import Any, Dict + import pytest -from tests.utils import delete_urns_from_file, ingest_file_via_rest +from tests.utils import delete_urns_from_file, execute_graphql, ingest_file_via_rest TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)" TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)" @@ -20,7 +22,7 @@ def ingest_cleanup_data(graph_client, auth_session, request): def test_get_browse_paths(auth_session, ingest_cleanup_data): # Iterate through each browse path, starting with the root - get_browse_paths_query = """query browse($input: BrowseInput!) {\n + query = """query browse($input: BrowseInput!) {\n browse(input: $input) {\n total\n entities {\n @@ -38,51 +40,27 @@ def test_get_browse_paths(auth_session, ingest_cleanup_data): }""" # /prod -- There should be one entity - get_browse_paths_json = { - "query": get_browse_paths_query, - "variables": { - "input": {"type": "DATASET", "path": ["prod"], "start": 0, "count": 100} - }, + variables: Dict[str, Any] = { + "input": {"type": "DATASET", "path": ["prod"], "start": 0, "count": 100} } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_browse_paths_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["browse"] is not None - assert "errors" not in res_data + res_data = execute_graphql(auth_session, query, variables) browse = res_data["data"]["browse"] print(browse) assert browse["entities"] == [{"urn": TEST_DATASET_3_URN}] # /prod/kafka1 - get_browse_paths_json = { - "query": get_browse_paths_query, - "variables": { - "input": { - "type": "DATASET", - "path": ["prod", "kafka1"], - "start": 0, - "count": 10, - } - }, + variables = { + "input": { + "type": "DATASET", + "path": ["prod", "kafka1"], + "start": 0, + "count": 10, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_browse_paths_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["browse"] is not None - assert "errors" not in res_data + res_data = execute_graphql(auth_session, query, variables) browse = res_data["data"]["browse"] assert browse == { @@ -97,28 +75,16 @@ def test_get_browse_paths(auth_session, ingest_cleanup_data): } # /prod/kafka2 - get_browse_paths_json = { - "query": get_browse_paths_query, - "variables": { - "input": { - "type": "DATASET", - "path": ["prod", "kafka2"], - "start": 0, - "count": 10, - } - }, + variables = { + "input": { + "type": "DATASET", + "path": ["prod", "kafka2"], + "start": 0, + "count": 10, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_browse_paths_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["browse"] is not None - assert "errors" not in res_data + res_data = execute_graphql(auth_session, query, variables) browse = res_data["data"]["browse"] assert browse == { diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py index 3d1217081fa6..01b04f306f54 100644 --- a/smoke-test/tests/containers/containers_test.py +++ b/smoke-test/tests/containers/containers_test.py @@ -1,6 +1,8 @@ +from typing import Any, Dict + import pytest -from tests.utils import delete_urns_from_file, ingest_file_via_rest +from tests.utils import delete_urns_from_file, execute_graphql, ingest_file_via_rest @pytest.fixture(scope="module", autouse=False) @@ -20,87 +22,80 @@ def test_get_full_container(auth_session, ingest_cleanup_data): editable_container_description = "custom description" # Get a full container - get_container_json = { - "query": """query container($urn: String!) {\n - container(urn: $urn) {\n - urn\n - type\n - platform {\n - urn\n - properties{\n - displayName\n - }\n - }\n - container {\n - urn\n - properties {\n - name\n - description\n - }\n - }\n - properties {\n - name\n - description\n - }\n - editableProperties {\n - description\n - }\n - ownership {\n - owners {\n - owner {\n - ...on CorpUser {\n - urn\n - }\n - }\n - }\n - }\n - institutionalMemory {\n - elements {\n - url\n - }\n - }\n - tags {\n - tags {\n - tag {\n - urn\n - }\n - }\n - }\n - glossaryTerms {\n - terms {\n - term {\n - urn\n - }\n - }\n - }\n - subTypes {\n - typeNames\n - }\n - entities(input: {}) {\n - total\n - searchResults {\n - entity {\n - ...on Dataset {\n - urn\n - }\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": container_urn}, - } + get_container_query = """query container($urn: String!) { + container(urn: $urn) { + urn + type + platform { + urn + properties{ + displayName + } + } + container { + urn + properties { + name + description + } + } + properties { + name + description + } + editableProperties { + description + } + ownership { + owners { + owner { + ...on CorpUser { + urn + } + } + } + } + institutionalMemory { + elements { + url + } + } + tags { + tags { + tag { + urn + } + } + } + glossaryTerms { + terms { + term { + urn + } + } + } + subTypes { + typeNames + } + entities(input: {}) { + total + searchResults { + entity { + ...on Dataset { + urn + } + } + } + } + } + }""" + get_container_variables: Dict[str, Any] = {"urn": container_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_container_json + res_data = execute_graphql( + auth_session, get_container_query, get_container_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["container"] is not None - assert "errors" not in res_data container = res_data["data"]["container"] assert container["urn"] == container_urn @@ -123,31 +118,22 @@ def test_get_parent_container(auth_session): dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" # Get count of existing secrets - get_dataset_json = { - "query": """query dataset($urn: String!) {\n - dataset(urn: $urn) {\n - urn\n - container {\n - urn\n - properties {\n - name\n - }\n - }\n - }\n - }""", - "variables": {"urn": dataset_urn}, - } + get_dataset_query = """query dataset($urn: String!) { + dataset(urn: $urn) { + urn + container { + urn + properties { + name + } + } + } + }""" + get_dataset_variables: Dict[str, Any] = {"urn": dataset_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_dataset_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, get_dataset_query, get_dataset_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["dataset"] is not None - assert "errors" not in res_data dataset = res_data["data"]["dataset"] assert dataset["container"]["properties"]["name"] == "datahub_schema" @@ -159,178 +145,133 @@ def test_update_container(auth_session): new_tag = "urn:li:tag:Test" - add_tag_json = { - "query": """mutation addTag($input: TagAssociationInput!) {\n + add_tag_query = """mutation addTag($input: TagAssociationInput!) { addTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": new_tag, - "resourceUrn": container_urn, - } - }, + }""" + add_tag_variables: Dict[str, Any] = { + "input": { + "tagUrn": new_tag, + "resourceUrn": container_urn, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_tag_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, add_tag_query, add_tag_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["addTag"] is True new_term = "urn:li:glossaryTerm:Term" - add_term_json = { - "query": """mutation addTerm($input: TermAssociationInput!) {\n + add_term_query = """mutation addTerm($input: TermAssociationInput!) { addTerm(input: $input) - }""", - "variables": { - "input": { - "termUrn": new_term, - "resourceUrn": container_urn, - } - }, + }""" + add_term_variables: Dict[str, Any] = { + "input": { + "termUrn": new_term, + "resourceUrn": container_urn, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_term_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, add_term_query, add_term_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["addTerm"] is True new_owner = "urn:li:corpuser:jdoe" - add_owner_json = { - "query": """mutation addOwner($input: AddOwnerInput!) {\n + add_owner_query = """mutation addOwner($input: AddOwnerInput!) { addOwner(input: $input) - }""", - "variables": { - "input": { - "ownerUrn": new_owner, - "resourceUrn": container_urn, - "ownerEntityType": "CORP_USER", - "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner", - } - }, + }""" + add_owner_variables: Dict[str, Any] = { + "input": { + "ownerUrn": new_owner, + "resourceUrn": container_urn, + "ownerEntityType": "CORP_USER", + "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner", + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_owner_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, add_owner_query, add_owner_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["addOwner"] is True new_link = "https://www.test.com" - add_link_json = { - "query": """mutation addLink($input: AddLinkInput!) {\n + add_link_query = """mutation addLink($input: AddLinkInput!) { addLink(input: $input) - }""", - "variables": { - "input": { - "linkUrl": new_link, - "resourceUrn": container_urn, - "label": "Label", - } - }, + }""" + add_link_variables: Dict[str, Any] = { + "input": { + "linkUrl": new_link, + "resourceUrn": container_urn, + "label": "Label", + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_link_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, add_link_query, add_link_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["addLink"] is True new_description = "New description" - update_description_json = { - "query": """mutation updateDescription($input: DescriptionUpdateInput!) {\n + update_description_query = """mutation updateDescription($input: DescriptionUpdateInput!) { updateDescription(input: $input) - }""", - "variables": { - "input": { - "description": new_description, - "resourceUrn": container_urn, - } - }, + }""" + update_description_variables: Dict[str, Any] = { + "input": { + "description": new_description, + "resourceUrn": container_urn, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_description_json + res_data = execute_graphql( + auth_session, update_description_query, update_description_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["updateDescription"] is True # Now fetch the container to ensure it was updated # Get the container - get_container_json = { - "query": """query container($urn: String!) {\n - container(urn: $urn) {\n - editableProperties {\n - description\n - }\n - ownership {\n - owners {\n - owner {\n - ...on CorpUser {\n - urn\n - }\n - }\n - }\n - }\n - institutionalMemory {\n - elements {\n - url\n - }\n - }\n - tags {\n - tags {\n - tag {\n - urn\n - }\n - }\n - }\n - glossaryTerms {\n - terms {\n - term {\n - urn\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": container_urn}, - } + get_container_query = """query container($urn: String!) { + container(urn: $urn) { + editableProperties { + description + } + ownership { + owners { + owner { + ...on CorpUser { + urn + } + } + } + } + institutionalMemory { + elements { + url + } + } + tags { + tags { + tag { + urn + } + } + } + glossaryTerms { + terms { + term { + urn + } + } + } + } + }""" + get_container_variables: Dict[str, Any] = {"urn": container_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_container_json + res_data = execute_graphql( + auth_session, get_container_query, get_container_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["container"] is not None - assert "errors" not in res_data container = res_data["data"]["container"] assert container["editableProperties"]["description"] == new_description diff --git a/smoke-test/tests/data_process_instance/test_data_process_instance.py b/smoke-test/tests/data_process_instance/test_data_process_instance.py index c0d2c62ba404..e5a0fd9a3174 100644 --- a/smoke-test/tests/data_process_instance/test_data_process_instance.py +++ b/smoke-test/tests/data_process_instance/test_data_process_instance.py @@ -26,6 +26,7 @@ ) from tests.utils import ( delete_urns_from_file, + execute_graphql, ingest_file_via_rest, wait_for_writes_to_sync, ) @@ -190,8 +191,7 @@ def ingest_cleanup_data(auth_session, graph_client, request): def test_search_dpi(auth_session, ingest_cleanup_data): """Test DPI search and validation of returned fields using GraphQL.""" - json = { - "query": """query scrollAcrossEntities($input: ScrollAcrossEntitiesInput!) { + query = """query scrollAcrossEntities($input: ScrollAcrossEntitiesInput!) { scrollAcrossEntities(input: $input) { nextScrollId count @@ -232,21 +232,14 @@ def test_search_dpi(auth_session, ingest_cleanup_data): } } } - }""", - "variables": { - "input": {"types": ["DATA_PROCESS_INSTANCE"], "query": dpi_id, "count": 10} - }, + }""" + variables = { + "input": {"types": ["DATA_PROCESS_INSTANCE"], "query": dpi_id, "count": 10} } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) # Basic response structure validation - assert res_data, "Response should not be empty" - assert "data" in res_data, "Response should contain 'data' field" print("RESPONSE DATA:" + str(res_data)) assert "scrollAcrossEntities" in res_data["data"], ( "Response should contain 'scrollAcrossEntities' field" diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py index 0377ddd05c38..a2a37a6dee38 100644 --- a/smoke-test/tests/deprecation/deprecation_test.py +++ b/smoke-test/tests/deprecation/deprecation_test.py @@ -1,6 +1,13 @@ +from typing import Any, Dict + import pytest -from tests.utils import delete_urns_from_file, get_root_urn, ingest_file_via_rest +from tests.utils import ( + delete_urns_from_file, + execute_graphql, + get_root_urn, + ingest_file_via_rest, +) @pytest.fixture(scope="module", autouse=True) @@ -18,8 +25,7 @@ def test_update_deprecation_all_fields(auth_session): "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" ) - dataset_json = { - "query": """query getDataset($urn: String!) {\n + query = """query getDataset($urn: String!) {\n dataset(urn: $urn) {\n deprecation {\n deprecated\n @@ -28,56 +34,33 @@ def test_update_deprecation_all_fields(auth_session): actor\n }\n }\n - }""", - "variables": {"urn": dataset_urn}, - } + }""" + variables: Dict[str, Any] = {"urn": dataset_urn} # Fetch tags - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["deprecation"] is None - update_deprecation_json = { - "query": """mutation updateDeprecation($input: UpdateDeprecationInput!) {\n + update_query = """mutation updateDeprecation($input: UpdateDeprecationInput!) {\n updateDeprecation(input: $input) - }""", - "variables": { - "input": { - "urn": dataset_urn, - "deprecated": True, - "note": "My test note", - "decommissionTime": 0, - } - }, + }""" + update_variables: Dict[str, Any] = { + "input": { + "urn": dataset_urn, + "deprecated": True, + "note": "My test note", + "decommissionTime": 0, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_deprecation_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, update_query, update_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["updateDeprecation"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["deprecation"] == { "deprecated": True, "decommissionTime": 0, @@ -92,26 +75,19 @@ def test_update_deprecation_partial_fields(auth_session, ingest_cleanup_data): "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" ) - update_deprecation_json = { - "query": """mutation updateDeprecation($input: UpdateDeprecationInput!) {\n + update_query = """mutation updateDeprecation($input: UpdateDeprecationInput!) {\n updateDeprecation(input: $input) - }""", - "variables": {"input": {"urn": dataset_urn, "deprecated": False}}, + }""" + update_variables: Dict[str, Any] = { + "input": {"urn": dataset_urn, "deprecated": False} } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_deprecation_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, update_query, update_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["updateDeprecation"] is True # Refetch the dataset - dataset_json = { - "query": """query getDataset($urn: String!) {\n + query = """query getDataset($urn: String!) {\n dataset(urn: $urn) {\n deprecation {\n deprecated\n @@ -120,19 +96,11 @@ def test_update_deprecation_partial_fields(auth_session, ingest_cleanup_data): actor\n }\n }\n - }""", - "variables": {"urn": dataset_urn}, - } + }""" + variables: Dict[str, Any] = {"urn": dataset_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["deprecation"] == { "deprecated": False, "note": "", diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index 1f8df84ac16a..15179273c916 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -1,7 +1,15 @@ +from typing import Any, Dict + import pytest import tenacity -from tests.utils import delete_urns_from_file, get_sleep_info, ingest_file_via_rest +from tests.utils import ( + delete_entity, + delete_urns_from_file, + execute_graphql, + get_sleep_info, + ingest_file_via_rest, +) sleep_sec, sleep_times = get_sleep_info() @@ -18,18 +26,13 @@ def ingest_cleanup_data(auth_session, graph_client, request): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_more_domains(auth_session, list_domains_json, before_count): +def _ensure_more_domains( + auth_session, query: str, variables: Dict[str, Any], before_count: int +) -> None: # Get new count of Domains - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_domains_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["listDomains"]["total"] is not None - assert "errors" not in res_data # Assert that there are more domains now. after_count = res_data["data"]["listDomains"]["total"] @@ -40,39 +43,27 @@ def _ensure_more_domains(auth_session, list_domains_json, before_count): @pytest.mark.dependency() def test_create_list_get_domain(auth_session): # Setup: Delete the domain (if exists) - response = auth_session.post( - f"{auth_session.gms_url()}/entities?action=delete", - json={"urn": "urn:li:domain:test id"}, - ) + delete_entity(auth_session, "urn:li:domain:test id") # Get count of existing secrets - list_domains_json = { - "query": """query listDomains($input: ListDomainsInput!) {\n - listDomains(input: $input) {\n - start\n - count\n - total\n - domains {\n - urn\n - properties {\n - name\n - }\n - }\n - }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } + list_domains_query = """query listDomains($input: ListDomainsInput!) { + listDomains(input: $input) { + start + count + total + domains { + urn + properties { + name + } + } + } + }""" + list_domains_variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_domains_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, list_domains_query, list_domains_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["listDomains"]["total"] is not None - assert "errors" not in res_data print(f"domains resp is {res_data}") before_count = res_data["data"]["listDomains"]["total"] @@ -83,63 +74,48 @@ def test_create_list_get_domain(auth_session): domain_description = "test description" # Create new Domain - create_domain_json = { - "query": """mutation createDomain($input: CreateDomainInput!) {\n + create_domain_query = """mutation createDomain($input: CreateDomainInput!) { createDomain(input: $input) - }""", - "variables": { - "input": { - "id": domain_id, - "name": domain_name, - "description": domain_description, - } - }, + }""" + create_domain_variables: Dict[str, Any] = { + "input": { + "id": domain_id, + "name": domain_name, + "description": domain_description, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=create_domain_json + res_data = execute_graphql( + auth_session, create_domain_query, create_domain_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["createDomain"] is not None - assert "errors" not in res_data domain_urn = res_data["data"]["createDomain"] _ensure_more_domains( auth_session=auth_session, - list_domains_json=list_domains_json, + query=list_domains_query, + variables=list_domains_variables, before_count=before_count, ) # Get the domain value back - get_domain_json = { - "query": """query domain($urn: String!) {\n - domain(urn: $urn) {\n - urn\n - id\n - properties {\n - name\n - description\n - }\n - }\n - }""", - "variables": {"urn": domain_urn}, - } + get_domain_query = """query domain($urn: String!) { + domain(urn: $urn) { + urn + id + properties { + name + description + } + } + }""" + get_domain_variables: Dict[str, Any] = {"urn": domain_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_domain_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, get_domain_query, get_domain_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["domain"] is not None - assert "errors" not in res_data domain = res_data["data"]["domain"] assert domain["urn"] == f"urn:li:domain:{domain_id}" @@ -147,14 +123,7 @@ def test_create_list_get_domain(auth_session): assert domain["properties"]["name"] == domain_name assert domain["properties"]["description"] == domain_description - delete_json = {"urn": domain_urn} - - # Cleanup: Delete the domain - response = auth_session.post( - f"{auth_session.gms_url()}/entities?action=delete", json=delete_json - ) - - response.raise_for_status() + delete_entity(auth_session, domain_urn) @pytest.mark.dependency(depends=["test_create_list_get_domain"]) @@ -166,66 +135,44 @@ def test_set_unset_domain(auth_session, ingest_cleanup_data): domain_urn = "urn:li:domain:engineering" # First unset to be sure. - unset_domain_json = { - "query": """mutation unsetDomain($entityUrn: String!) {\n - unsetDomain(entityUrn: $entityUrn)}""", - "variables": {"entityUrn": dataset_urn}, - } + unset_domain_query = """mutation unsetDomain($entityUrn: String!) { + unsetDomain(entityUrn: $entityUrn)}""" + unset_domain_variables: Dict[str, Any] = {"entityUrn": dataset_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=unset_domain_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, unset_domain_query, unset_domain_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["unsetDomain"] is True - assert "errors" not in res_data # Set a new domain - set_domain_json = { - "query": """mutation setDomain($entityUrn: String!, $domainUrn: String!) {\n - setDomain(entityUrn: $entityUrn, domainUrn: $domainUrn)}""", - "variables": {"entityUrn": dataset_urn, "domainUrn": domain_urn}, + set_domain_query = """mutation setDomain($entityUrn: String!, $domainUrn: String!) { + setDomain(entityUrn: $entityUrn, domainUrn: $domainUrn)}""" + set_domain_variables: Dict[str, Any] = { + "entityUrn": dataset_urn, + "domainUrn": domain_urn, } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=set_domain_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, set_domain_query, set_domain_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["setDomain"] is True - assert "errors" not in res_data # Now, fetch the dataset's domain and confirm it was set. - get_dataset_json = { - "query": """query dataset($urn: String!) {\n - dataset(urn: $urn) {\n - urn\n - domain {\n - domain {\n - urn\n - properties{\n - name\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": dataset_urn}, - } + get_dataset_query = """query dataset($urn: String!) { + dataset(urn: $urn) { + urn + domain { + domain { + urn + properties{ + name + } + } + } + } + }""" + get_dataset_variables: Dict[str, Any] = {"urn": dataset_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_dataset_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, get_dataset_query, get_dataset_variables) - assert res_data assert res_data["data"]["dataset"]["domain"]["domain"]["urn"] == domain_urn assert ( res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"] diff --git a/smoke-test/tests/incidents/incidents_test.py b/smoke-test/tests/incidents/incidents_test.py index d7221c0c1b35..3d8803b4c087 100644 --- a/smoke-test/tests/incidents/incidents_test.py +++ b/smoke-test/tests/incidents/incidents_test.py @@ -1,8 +1,14 @@ import time +from typing import Any, Dict import pytest -from tests.utils import delete_urns_from_file, ingest_file_via_rest +from tests.utils import ( + delete_entity, + delete_urns_from_file, + execute_graphql, + ingest_file_via_rest, +) @pytest.fixture(scope="module", autouse=True) @@ -25,62 +31,54 @@ def test_list_dataset_incidents(auth_session): # Sleep for eventual consistency (not ideal) time.sleep(2) - list_dataset_incidents_json = { - "query": """query dataset($urn: String!) {\n - dataset(urn: $urn) {\n - incidents(state: ACTIVE, start: 0, count: 10) {\n - start\n - count\n - total\n - incidents {\n - urn\n - type\n - incidentType\n - title\n - description\n - incidentStatus {\n - state\n - message\n - lastUpdated {\n - time\n - actor\n - }\n - }\n - source {\n - type\n - source {\n - ... on Assertion {\n - urn\n - info {\n + list_dataset_incidents_query = """query dataset($urn: String!) { + dataset(urn: $urn) { + incidents(state: ACTIVE, start: 0, count: 10) { + start + count + total + incidents { + urn + type + incidentType + title + description + incidentStatus { + state + message + lastUpdated { + time + actor + } + } + source { + type + source { + ... on Assertion { + urn + info { type - }\n - }\n - }\n - }\n - entity {\n - urn\n - }\n - created {\n - time\n - actor\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": TEST_DATASET_URN}, - } + } + } + } + } + entity { + urn + } + created { + time + actor + } + } + } + } + }""" + list_dataset_incidents_variables: Dict[str, Any] = {"urn": TEST_DATASET_URN} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", - json=list_dataset_incidents_json, + res_data = execute_graphql( + auth_session, list_dataset_incidents_query, list_dataset_incidents_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert "errors" not in res_data - assert res_data["data"] assert res_data["data"]["dataset"]["incidents"] == { "start": 0, "count": 10, @@ -111,110 +109,88 @@ def test_list_dataset_incidents(auth_session): @pytest.mark.dependency(depends=["test_list_dataset_incidents"]) def test_raise_resolve_incident(auth_session): # Raise new incident - raise_incident_json = { - "query": """mutation raiseIncident($input: RaiseIncidentInput!) {\n + raise_incident_query = """mutation raiseIncident($input: RaiseIncidentInput!) { raiseIncident(input: $input) - }""", - "variables": { - "input": { - "type": "OPERATIONAL", - "title": "test title 2", - "description": "test description 2", - "resourceUrn": TEST_DATASET_URN, - "priority": "CRITICAL", - } - }, + }""" + raise_incident_variables: Dict[str, Any] = { + "input": { + "type": "OPERATIONAL", + "title": "test title 2", + "description": "test description 2", + "resourceUrn": TEST_DATASET_URN, + "priority": "CRITICAL", + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=raise_incident_json + res_data = execute_graphql( + auth_session, raise_incident_query, raise_incident_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert "errors" not in res_data - assert res_data["data"] assert res_data["data"]["raiseIncident"] is not None new_incident_urn = res_data["data"]["raiseIncident"] # Resolve the incident. - update_incident_status = { - "query": """mutation updateIncidentStatus($urn: String!, $input: IncidentStatusInput!) {\n + update_incident_status_query = """mutation updateIncidentStatus($urn: String!, $input: IncidentStatusInput!) { updateIncidentStatus(urn: $urn, input: $input) - }""", - "variables": { - "urn": new_incident_urn, - "input": { - "state": "RESOLVED", - "message": "test message 2", - }, + }""" + update_incident_status_variables: Dict[str, Any] = { + "urn": new_incident_urn, + "input": { + "state": "RESOLVED", + "message": "test message 2", }, } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_incident_status + res_data = execute_graphql( + auth_session, update_incident_status_query, update_incident_status_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert "errors" not in res_data - assert res_data["data"] assert res_data["data"]["updateIncidentStatus"] is True # Sleep for eventual consistency (not ideal) time.sleep(2) # Fetch the dataset's incidents to confirm there's a resolved incident.new_incident_urn - list_dataset_incidents_json = { - "query": """query dataset($urn: String!) {\n - dataset(urn: $urn) {\n - incidents(state: RESOLVED, start: 0, count: 10) {\n - start\n - count\n - total\n - incidents {\n - urn\n - type\n - incidentType\n - title\n - description\n - priority\n - incidentStatus {\n - state\n - message\n - lastUpdated {\n - time\n - actor\n - }\n - }\n - entity {\n - urn\n - }\n - created {\n - time\n - actor\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": TEST_DATASET_URN}, - } + list_resolved_incidents_query = """query dataset($urn: String!) { + dataset(urn: $urn) { + incidents(state: RESOLVED, start: 0, count: 10) { + start + count + total + incidents { + urn + type + incidentType + title + description + priority + incidentStatus { + state + message + lastUpdated { + time + actor + } + } + entity { + urn + } + created { + time + actor + } + } + } + } + }""" + list_resolved_incidents_variables: Dict[str, Any] = {"urn": TEST_DATASET_URN} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", - json=list_dataset_incidents_json, + res_data = execute_graphql( + auth_session, list_resolved_incidents_query, list_resolved_incidents_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["dataset"]["incidents"]["total"] is not None - assert "errors" not in res_data # Find the new incident and do the comparison. active_incidents = res_data["data"]["dataset"]["incidents"]["incidents"] @@ -228,11 +204,5 @@ def test_raise_resolve_incident(auth_session): assert new_incident["incidentStatus"]["state"] == "RESOLVED" assert new_incident["priority"] == "CRITICAL" - delete_json = {"urn": new_incident_urn} - # Cleanup: Delete the incident - response = auth_session.post( - f"{auth_session.gms_url()}/entities?action=delete", json=delete_json - ) - - response.raise_for_status() + delete_entity(auth_session, new_incident_urn) diff --git a/smoke-test/tests/institutional_memory/institutional_memory_test.py b/smoke-test/tests/institutional_memory/institutional_memory_test.py index 23641d5f5481..d9a6a114f7ce 100644 --- a/smoke-test/tests/institutional_memory/institutional_memory_test.py +++ b/smoke-test/tests/institutional_memory/institutional_memory_test.py @@ -1,6 +1,11 @@ import pytest -from tests.utils import delete_urns_from_file, get_admin_username, ingest_file_via_rest +from tests.utils import ( + delete_urns_from_file, + execute_graphql, + get_admin_username, + ingest_file_via_rest, +) @pytest.fixture(scope="function", autouse=True) @@ -60,19 +65,8 @@ def ingest_cleanup_data(auth_session, graph_client, request): ADMIN_USERNAME = get_admin_username() -def execute_query(auth_session, query, variables): - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", - json={"query": query, "variables": variables}, - ) - response.raise_for_status() - res_data = response.json() - - return res_data - - def test_get_institutional_memory(auth_session): - res_data = execute_query(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) + res_data = execute_graphql(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) assert res_data assert "errors" not in res_data @@ -94,7 +88,7 @@ def test_get_institutional_memory(auth_session): def test_add_institutional_memory(auth_session): - res_data = execute_query( + res_data = execute_graphql( auth_session, MUTATION_ADD, { @@ -112,7 +106,7 @@ def test_add_institutional_memory(auth_session): assert res_data["data"] assert res_data["data"]["addLink"] - res_data = execute_query(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) + res_data = execute_graphql(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) assert res_data assert "errors" not in res_data @@ -147,7 +141,7 @@ def test_add_institutional_memory(auth_session): def test_update_institutional_memory(auth_session): - res_data = execute_query( + res_data = execute_graphql( auth_session, MUTATION_UPDATE, { @@ -167,7 +161,7 @@ def test_update_institutional_memory(auth_session): assert res_data["data"] assert res_data["data"]["updateLink"] - res_data = execute_query(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) + res_data = execute_graphql(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) assert res_data assert "errors" not in res_data @@ -195,7 +189,7 @@ def test_update_institutional_memory(auth_session): def test_remove_institutional_memory(auth_session): - res_data = execute_query( + res_data = execute_graphql( auth_session, MUTATION_REMOVE, { @@ -212,7 +206,7 @@ def test_remove_institutional_memory(auth_session): assert res_data["data"] assert res_data["data"]["removeLink"] - res_data = execute_query(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) + res_data = execute_graphql(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) assert res_data assert "errors" not in res_data @@ -221,7 +215,7 @@ def test_remove_institutional_memory(auth_session): def test_upsert_institutional_memory(auth_session): - res_data = execute_query( + res_data = execute_graphql( auth_session, MUTATION_UPSERT, { @@ -241,7 +235,7 @@ def test_upsert_institutional_memory(auth_session): assert res_data["data"] assert res_data["data"]["upsertLink"] - res_data = execute_query( + res_data = execute_graphql( auth_session, MUTATION_UPSERT, { @@ -261,7 +255,7 @@ def test_upsert_institutional_memory(auth_session): assert res_data["data"] assert res_data["data"]["upsertLink"] - res_data = execute_query(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) + res_data = execute_graphql(auth_session, QUERY_LIST, {"urn": TEST_DATASET_URN}) assert res_data assert "errors" not in res_data diff --git a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py index ee949bc7c4b1..4da55d17f7b6 100644 --- a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py +++ b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py @@ -1,16 +1,16 @@ import json +from typing import Any, Dict import pytest import tenacity -from tests.utils import get_sleep_info +from tests.utils import execute_graphql, get_sleep_info sleep_sec, sleep_times = get_sleep_info() def _get_ingestionSources(auth_session): - json_q = { - "query": """query listIngestionSources($input: ListIngestionSourcesInput!) {\n + query = """query listIngestionSources($input: ListIngestionSourcesInput!) {\n listIngestionSources(input: $input) {\n start\n count\n @@ -19,20 +19,10 @@ def _get_ingestionSources(auth_session): urn\n }\n }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["listIngestionSources"]["total"] is not None - assert "errors" not in res_data return res_data @@ -50,8 +40,7 @@ def _ensure_ingestion_source_count(auth_session, expected_count): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_secret_increased(auth_session, before_count): - json_q = { - "query": """query listSecrets($input: ListSecretsInput!) {\n + query = """query listSecrets($input: ListSecretsInput!) {\n listSecrets(input: $input) {\n start\n count\n @@ -61,20 +50,10 @@ def _ensure_secret_increased(auth_session, before_count): name\n }\n }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["listSecrets"]["total"] is not None - assert "errors" not in res_data # Assert that there are more secrets now. after_count = res_data["data"]["listSecrets"]["total"] @@ -86,26 +65,15 @@ def _ensure_secret_increased(auth_session, before_count): ) def _ensure_secret_not_present(auth_session): # Get the secret value back - json_q = { - "query": """query getSecretValues($input: GetSecretValuesInput!) {\n + query = """query getSecretValues($input: GetSecretValuesInput!) {\n getSecretValues(input: $input) {\n name\n value\n }\n - }""", - "variables": {"input": {"secrets": ["SMOKE_TEST"]}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables: Dict[str, Any] = {"input": {"secrets": ["SMOKE_TEST"]}} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["getSecretValues"] is not None - assert "errors" not in res_data secret_values = res_data["data"]["getSecretValues"] secret_value_arr = [x for x in secret_values if x["name"] == "SMOKE_TEST"] @@ -118,8 +86,7 @@ def _ensure_secret_not_present(auth_session): def _ensure_ingestion_source_present( auth_session, ingestion_source_urn, num_execs=None ): - json_q = { - "query": """query ingestionSource($urn: String!) {\n + query = """query ingestionSource($urn: String!) {\n ingestionSource(urn: $urn) {\n executions(start: 0, count: 1) {\n start\n @@ -130,21 +97,12 @@ def _ensure_ingestion_source_present( }\n }\n }\n - }""", - "variables": {"urn": ingestion_source_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() + }""" + variables: Dict[str, Any] = {"urn": ingestion_source_urn} + res_data = execute_graphql(auth_session, query, variables) print(res_data) - assert res_data - assert res_data["data"] assert res_data["data"]["ingestionSource"] is not None - assert "errors" not in res_data if num_execs is not None: ingestion_source = res_data["data"]["ingestionSource"] @@ -157,8 +115,7 @@ def _ensure_ingestion_source_present( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_execution_request_present(auth_session, execution_request_urn): - json_q = { - "query": """query executionRequest($urn: String!) {\n + query = """query executionRequest($urn: String!) {\n executionRequest(urn: $urn) {\n urn\n input {\n @@ -174,27 +131,16 @@ def _ensure_execution_request_present(auth_session, execution_request_urn): durationMs\n }\n }\n - }""", - "variables": {"urn": execution_request_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables: Dict[str, Any] = {"urn": execution_request_urn} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["executionRequest"] is not None - assert "errors" not in res_data return res_data def test_create_list_get_remove_secret(auth_session): # Get count of existing secrets - json_q = { - "query": """query listSecrets($input: ListSecretsInput!) {\n + query = """query listSecrets($input: ListSecretsInput!) {\n listSecrets(input: $input) {\n start\n count\n @@ -204,41 +150,20 @@ def test_create_list_get_remove_secret(auth_session): name\n }\n }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["listSecrets"]["total"] is not None - assert "errors" not in res_data before_count = res_data["data"]["listSecrets"]["total"] # Create new secret - json_q = { - "query": """mutation createSecret($input: CreateSecretInput!) {\n + query = """mutation createSecret($input: CreateSecretInput!) {\n createSecret(input: $input) - }""", - "variables": {"input": {"name": "SMOKE_TEST", "value": "mytestvalue"}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables = {"input": {"name": "SMOKE_TEST", "value": "mytestvalue"}} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["createSecret"] is not None - assert "errors" not in res_data secret_urn = res_data["data"]["createSecret"] @@ -246,77 +171,45 @@ def test_create_list_get_remove_secret(auth_session): _ensure_secret_increased(auth_session, before_count) # Update existing secret - json_q = { - "query": """mutation updateSecret($input: UpdateSecretInput!) {\n + query = """mutation updateSecret($input: UpdateSecretInput!) {\n updateSecret(input: $input) - }""", - "variables": { - "input": { - "urn": secret_urn, - "name": "SMOKE_TEST", - "value": "mytestvalue.updated", - } - }, + }""" + variables = { + "input": { + "urn": secret_urn, + "name": "SMOKE_TEST", + "value": "mytestvalue.updated", + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["updateSecret"] is not None - assert "errors" not in res_data secret_urn = res_data["data"]["updateSecret"] # Get the secret value back - json_q = { - "query": """query getSecretValues($input: GetSecretValuesInput!) {\n + query = """query getSecretValues($input: GetSecretValuesInput!) {\n getSecretValues(input: $input) {\n name\n value\n }\n - }""", - "variables": {"input": {"secrets": ["SMOKE_TEST"]}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() + }""" + variables = {"input": {"secrets": ["SMOKE_TEST"]}} + res_data = execute_graphql(auth_session, query, variables) print(res_data) - assert res_data - assert res_data["data"] assert res_data["data"]["getSecretValues"] is not None - assert "errors" not in res_data secret_values = res_data["data"]["getSecretValues"] secret_value = [x for x in secret_values if x["name"] == "SMOKE_TEST"][0] assert secret_value["value"] == "mytestvalue.updated" # Now cleanup and remove the secret - json_q = { - "query": """mutation deleteSecret($urn: String!) {\n + query = """mutation deleteSecret($urn: String!) {\n deleteSecret(urn: $urn) - }""", - "variables": {"urn": secret_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables = {"urn": secret_urn} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["deleteSecret"] is not None - assert "errors" not in res_data # Re-fetch the secret values and see that they are not there. _ensure_secret_not_present(auth_session) @@ -330,35 +223,24 @@ def test_create_list_get_remove_ingestion_source(auth_session): before_count = res_data["data"]["listIngestionSources"]["total"] # Create new ingestion source - json_q = { - "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n + query = """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n createIngestionSource(input: $input) - }""", - "variables": { - "input": { - "name": "My Test Ingestion Source", - "type": "mysql", - "description": "My ingestion source description", - "schedule": {"interval": "*/60 * * * *", "timezone": "UTC"}, - "config": { - "recipe": '{"source":{"type":"mysql","config":{"include_tables":true,"database":null,"password":"${MYSQL_PASSWORD}","profiling":{"enabled":false},"host_port":null,"include_views":true,"username":"${MYSQL_USERNAME}"}},"pipeline_name":"urn:li:dataHubIngestionSource:f38bd060-4ea8-459c-8f24-a773286a2927"}', - "version": "0.8.18", - "executorId": "mytestexecutor", - }, - } - }, + }""" + variables: Dict[str, Any] = { + "input": { + "name": "My Test Ingestion Source", + "type": "mysql", + "description": "My ingestion source description", + "schedule": {"interval": "*/60 * * * *", "timezone": "UTC"}, + "config": { + "recipe": '{"source":{"type":"mysql","config":{"include_tables":true,"database":null,"password":"${MYSQL_PASSWORD}","profiling":{"enabled":false},"host_port":null,"include_views":true,"username":"${MYSQL_USERNAME}"}},"pipeline_name":"urn:li:dataHubIngestionSource:f38bd060-4ea8-459c-8f24-a773286a2927"}', + "version": "0.8.18", + "executorId": "mytestexecutor", + }, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["createIngestionSource"] is not None - assert "errors" not in res_data ingestion_source_urn = res_data["data"]["createIngestionSource"] @@ -366,8 +248,7 @@ def test_create_list_get_remove_ingestion_source(auth_session): after_count = _ensure_ingestion_source_count(auth_session, before_count + 1) # Get the ingestion source back - json_q = { - "query": """query ingestionSource($urn: String!) {\n + query = """query ingestionSource($urn: String!) {\n ingestionSource(urn: $urn) {\n urn\n type\n @@ -382,20 +263,10 @@ def test_create_list_get_remove_ingestion_source(auth_session): version\n }\n }\n - }""", - "variables": {"urn": ingestion_source_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables = {"urn": ingestion_source_urn} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["ingestionSource"] is not None - assert "errors" not in res_data ingestion_source = res_data["data"]["ingestionSource"] assert ingestion_source["urn"] == ingestion_source_urn @@ -411,24 +282,13 @@ def test_create_list_get_remove_ingestion_source(auth_session): assert ingestion_source["config"]["version"] == "0.8.18" # Now cleanup and remove the ingestion source - json_q = { - "query": """mutation deleteIngestionSource($urn: String!) {\n + query = """mutation deleteIngestionSource($urn: String!) {\n deleteIngestionSource(urn: $urn) - }""", - "variables": {"urn": ingestion_source_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data + }""" + variables = {"urn": ingestion_source_urn} + res_data = execute_graphql(auth_session, query, variables) print(res_data) - assert res_data["data"] assert res_data["data"]["deleteIngestionSource"] is not None - assert "errors" not in res_data # Ensure the ingestion source has been removed. _ensure_ingestion_source_count(auth_session, after_count - 1) @@ -441,58 +301,36 @@ def test_create_list_get_remove_ingestion_source(auth_session): ) def test_create_list_get_ingestion_execution_request(auth_session): # Create new ingestion source - json_q = { - "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n + query = """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n createIngestionSource(input: $input) - }""", - "variables": { - "input": { - "name": "My Test Ingestion Source", - "type": "mysql", - "description": "My ingestion source description", - "schedule": {"interval": "*/5 * * * *", "timezone": "UTC"}, - "config": { - "recipe": '{"source":{"type":"mysql","config":{"include_tables":true,"database":null,"password":"${MYSQL_PASSWORD}","profiling":{"enabled":false},"host_port":null,"include_views":true,"username":"${MYSQL_USERNAME}"}},"pipeline_name":"urn:li:dataHubIngestionSource:f38bd060-4ea8-459c-8f24-a773286a2927"}', - "version": "0.8.18", - "executorId": "mytestexecutor", - }, - } - }, + }""" + variables: Dict[str, Any] = { + "input": { + "name": "My Test Ingestion Source", + "type": "mysql", + "description": "My ingestion source description", + "schedule": {"interval": "*/5 * * * *", "timezone": "UTC"}, + "config": { + "recipe": '{"source":{"type":"mysql","config":{"include_tables":true,"database":null,"password":"${MYSQL_PASSWORD}","profiling":{"enabled":false},"host_port":null,"include_views":true,"username":"${MYSQL_USERNAME}"}},"pipeline_name":"urn:li:dataHubIngestionSource:f38bd060-4ea8-459c-8f24-a773286a2927"}', + "version": "0.8.18", + "executorId": "mytestexecutor", + }, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["createIngestionSource"] is not None - assert "errors" not in res_data ingestion_source_urn = res_data["data"]["createIngestionSource"] # Create a request to execute the ingestion source - json_q = { - "query": """mutation createIngestionExecutionRequest($input: CreateIngestionExecutionRequestInput!) {\n + query = """mutation createIngestionExecutionRequest($input: CreateIngestionExecutionRequestInput!) {\n createIngestionExecutionRequest(input: $input) - }""", - "variables": {"input": {"ingestionSourceUrn": ingestion_source_urn}}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables = {"input": {"ingestionSourceUrn": ingestion_source_urn}} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["createIngestionExecutionRequest"] is not None, ( f"res_data was {res_data}" ) - assert "errors" not in res_data execution_request_urn = res_data["data"]["createIngestionExecutionRequest"] @@ -532,20 +370,9 @@ def test_create_list_get_ingestion_execution_request(auth_session): assert execution_request["result"] is None # Now cleanup and remove the ingestion source - json_q = { - "query": """mutation deleteIngestionSource($urn: String!) {\n + query = """mutation deleteIngestionSource($urn: String!) {\n deleteIngestionSource(urn: $urn) - }""", - "variables": {"urn": ingestion_source_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + }""" + variables = {"urn": ingestion_source_urn} + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"]["deleteIngestionSource"] is not None - assert "errors" not in res_data diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py index aef99c08e422..89edf67db73a 100644 --- a/smoke-test/tests/policies/test_policies.py +++ b/smoke-test/tests/policies/test_policies.py @@ -1,7 +1,9 @@ +from typing import Any, Dict + import pytest import tenacity -from tests.utils import get_root_urn, get_sleep_info +from tests.utils import execute_graphql, get_root_urn, get_sleep_info TEST_POLICY_NAME = "Updated Platform Policy" @@ -70,85 +72,69 @@ def _ensure_policy_present(auth_session, new_urn): def test_frontend_policy_operations(auth_session): - json = { - "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n - createPolicy(input: $input) }""", - "variables": { - "input": { - "type": "METADATA", - "name": "Test Metadata Policy", - "description": "My Metadaata Policy", - "state": "ACTIVE", - "resources": {"type": "dataset", "allResources": True}, - "privileges": ["EDIT_ENTITY_TAGS"], - "actors": { - "users": [get_root_urn()], - "resourceOwners": False, - "allUsers": False, - "allGroups": False, - }, - } - }, + create_policy_query = """mutation createPolicy($input: PolicyUpdateInput!) { + createPolicy(input: $input) }""" + create_policy_variables: Dict[str, Any] = { + "input": { + "type": "METADATA", + "name": "Test Metadata Policy", + "description": "My Metadaata Policy", + "state": "ACTIVE", + "resources": {"type": "dataset", "allResources": True}, + "privileges": ["EDIT_ENTITY_TAGS"], + "actors": { + "users": [get_root_urn()], + "resourceOwners": False, + "allUsers": False, + "allGroups": False, + }, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json + res_data = execute_graphql( + auth_session, create_policy_query, create_policy_variables ) - response.raise_for_status() - res_data = response.json() - assert res_data - assert res_data["data"] assert res_data["data"]["createPolicy"] new_urn = res_data["data"]["createPolicy"] - update_json = { - "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n - updatePolicy(urn: $urn, input: $input) }""", - "variables": { - "urn": new_urn, - "input": { - "type": "METADATA", - "state": "ACTIVE", - "name": "Test Metadata Policy", - "description": "Updated Metadaata Policy", - "privileges": ["EDIT_ENTITY_TAGS", "EDIT_ENTITY_GLOSSARY_TERMS"], - "actors": { - "resourceOwners": False, - "allUsers": True, - "allGroups": False, - }, + update_policy_query = """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) { + updatePolicy(urn: $urn, input: $input) }""" + update_policy_variables: Dict[str, Any] = { + "urn": new_urn, + "input": { + "type": "METADATA", + "state": "ACTIVE", + "name": "Test Metadata Policy", + "description": "Updated Metadaata Policy", + "privileges": ["EDIT_ENTITY_TAGS", "EDIT_ENTITY_GLOSSARY_TERMS"], + "actors": { + "resourceOwners": False, + "allUsers": True, + "allGroups": False, }, }, } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_json + res_data = execute_graphql( + auth_session, update_policy_query, update_policy_variables ) - response.raise_for_status() - res_data = response.json() # Check updated was submitted successfully - assert res_data - assert res_data["data"] assert res_data["data"]["updatePolicy"] assert res_data["data"]["updatePolicy"] == new_urn _ensure_policy_present(auth_session, new_urn) # Now test that the policy can be deleted - json = { - "query": """mutation deletePolicy($urn: String!) {\n - deletePolicy(urn: $urn) }""", - "variables": {"urn": new_urn}, - } + delete_policy_query = """mutation deletePolicy($urn: String!) { + deletePolicy(urn: $urn) }""" + delete_policy_variables: Dict[str, Any] = {"urn": new_urn} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json + res_data = execute_graphql( + auth_session, delete_policy_query, delete_policy_variables ) - response.raise_for_status() - res_data = response.json() res_data = listPolicies(auth_session) @@ -165,45 +151,39 @@ def test_frontend_policy_operations(auth_session): def listPolicies(auth_session): - json = { - "query": """query listPolicies($input: ListPoliciesInput!) {\n - listPolicies(input: $input) {\n - start\n - count\n - total\n - policies {\n - urn\n - type\n - name\n - description\n - state\n - resources {\n - type\n - allResources\n - resources\n - }\n - privileges\n - actors {\n - users\n - groups\n - allUsers\n - allGroups\n - resourceOwners\n - }\n - editable\n - }\n - }\n - }""", - "variables": { - "input": { - "start": 0, - "count": 20, + query = """query listPolicies($input: ListPoliciesInput!) { + listPolicies(input: $input) { + start + count + total + policies { + urn + type + name + description + state + resources { + type + allResources + resources + } + privileges + actors { + users + groups + allUsers + allGroups + resourceOwners + } + editable + } } - }, + }""" + variables: Dict[str, Any] = { + "input": { + "start": 0, + "count": 20, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json - ) - response.raise_for_status() - return response.json() + return execute_graphql(auth_session, query, variables) diff --git a/smoke-test/tests/tags_and_terms/tags_and_terms_test.py b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py index 49bfdbc4f939..8c1b5ab677ce 100644 --- a/smoke-test/tests/tags_and_terms/tags_and_terms_test.py +++ b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py @@ -1,6 +1,8 @@ +from typing import Any, Dict + import pytest -from tests.utils import delete_urns_from_file, ingest_file_via_rest +from tests.utils import delete_urns_from_file, execute_graphql, ingest_file_via_rest @pytest.fixture(scope="module", autouse=True) @@ -18,67 +20,40 @@ def test_add_tag(auth_session): env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - dataset_json = { - "query": """query getDataset($urn: String!) {\n - dataset(urn: $urn) {\n - globalTags {\n - tags {\n - tag {\n - urn\n - name\n - description\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": dataset_urn}, - } + dataset_query = """query getDataset($urn: String!) { + dataset(urn: $urn) { + globalTags { + tags { + tag { + urn + name + description + } + } + } + } + }""" + dataset_variables: Dict[str, Any] = {"urn": dataset_urn} # Fetch tags - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, dataset_query, dataset_variables) assert res_data["data"]["dataset"]["globalTags"] is None - add_json = { - "query": """mutation addTag($input: TagAssociationInput!) {\n + add_query = """mutation addTag($input: TagAssociationInput!) { addTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": "urn:li:tag:Legacy", - "resourceUrn": dataset_urn, - } - }, + }""" + add_variables: Dict[str, Any] = { + "input": { + "tagUrn": "urn:li:tag:Legacy", + "resourceUrn": dataset_urn, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, add_query, add_variables) assert res_data["data"]["addTag"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, dataset_query, dataset_variables) assert res_data["data"]["dataset"]["globalTags"] == { "tags": [ { @@ -91,107 +66,61 @@ def test_add_tag(auth_session): ] } - remove_json = { - "query": """mutation removeTag($input: TagAssociationInput!) {\n + remove_query = """mutation removeTag($input: TagAssociationInput!) { removeTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": "urn:li:tag:Legacy", - "resourceUrn": dataset_urn, - } - }, + }""" + remove_variables: Dict[str, Any] = { + "input": { + "tagUrn": "urn:li:tag:Legacy", + "resourceUrn": dataset_urn, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json - ) - response.raise_for_status() - res_data = response.json() - + res_data = execute_graphql(auth_session, remove_query, remove_variables) print(res_data) - - assert res_data - assert res_data["data"] assert res_data["data"]["removeTag"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, dataset_query, dataset_variables) assert res_data["data"]["dataset"]["globalTags"] == {"tags": []} def test_add_tag_to_chart(auth_session): chart_urn = "urn:li:chart:(looker,test-tags-terms-sample-chart)" - chart_json = { - "query": """query getChart($urn: String!) {\n - chart(urn: $urn) {\n - globalTags {\n - tags {\n - tag {\n - urn\n - name\n - description\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": chart_urn}, - } + chart_query = """query getChart($urn: String!) { + chart(urn: $urn) { + globalTags { + tags { + tag { + urn + name + description + } + } + } + } + }""" + chart_variables: Dict[str, Any] = {"urn": chart_urn} # Fetch tags - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=chart_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["chart"] + res_data = execute_graphql(auth_session, chart_query, chart_variables) assert res_data["data"]["chart"]["globalTags"] is None - add_json = { - "query": """mutation addTag($input: TagAssociationInput!) {\n - addTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": "urn:li:tag:Legacy", - "resourceUrn": chart_urn, - } - }, + add_mutation = """mutation addTag($input: TagAssociationInput!) { + addTag(input: $input) + }""" + add_variables: Dict[str, Any] = { + "input": { + "tagUrn": "urn:li:tag:Legacy", + "resourceUrn": chart_urn, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, add_mutation, add_variables) assert res_data["data"]["addTag"] is True - # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=chart_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["chart"] + # Refetch the chart + res_data = execute_graphql(auth_session, chart_query, chart_variables) assert res_data["data"]["chart"]["globalTags"] == { "tags": [ { @@ -204,38 +133,20 @@ def test_add_tag_to_chart(auth_session): ] } - remove_json = { - "query": """mutation removeTag($input: TagAssociationInput!) {\n - removeTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": "urn:li:tag:Legacy", - "resourceUrn": chart_urn, - } - }, + remove_mutation = """mutation removeTag($input: TagAssociationInput!) { + removeTag(input: $input) + }""" + remove_variables: Dict[str, Any] = { + "input": { + "tagUrn": "urn:li:tag:Legacy", + "resourceUrn": chart_urn, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, remove_mutation, remove_variables) assert res_data["data"]["removeTag"] is True - # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=chart_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["chart"] + # Refetch the chart + res_data = execute_graphql(auth_session, chart_query, chart_variables) assert res_data["data"]["chart"]["globalTags"] == {"tags": []} @@ -245,68 +156,39 @@ def test_add_term(auth_session): env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - dataset_json = { - "query": """query getDataset($urn: String!) {\n - dataset(urn: $urn) {\n - glossaryTerms {\n - terms {\n - term {\n - urn\n - name\n - }\n - }\n - }\n - }\n - }""", - "variables": {"urn": dataset_urn}, - } + dataset_query = """query getDataset($urn: String!) { + dataset(urn: $urn) { + glossaryTerms { + terms { + term { + urn + name + } + } + } + } + }""" + dataset_variables: Dict[str, Any] = {"urn": dataset_urn} # Fetch the terms - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, dataset_query, dataset_variables) assert res_data["data"]["dataset"]["glossaryTerms"] is None - add_json = { - "query": """mutation addTerm($input: TermAssociationInput!) {\n - addTerm(input: $input) - }""", - "variables": { - "input": { - "termUrn": "urn:li:glossaryTerm:SavingAccount", - "resourceUrn": dataset_urn, - } - }, + add_mutation = """mutation addTerm($input: TermAssociationInput!) { + addTerm(input: $input) + }""" + add_variables: Dict[str, Any] = { + "input": { + "termUrn": "urn:li:glossaryTerm:SavingAccount", + "resourceUrn": dataset_urn, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json - ) - response.raise_for_status() - res_data = response.json() - + res_data = execute_graphql(auth_session, add_mutation, add_variables) print(res_data) - - assert res_data - assert res_data["data"] assert res_data["data"]["addTerm"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, dataset_query, dataset_variables) assert res_data["data"]["dataset"]["glossaryTerms"] == { "terms": [ { @@ -318,39 +200,21 @@ def test_add_term(auth_session): ] } - remove_json = { - "query": """mutation removeTerm($input: TermAssociationInput!) {\n - removeTerm(input: $input) - }""", - "variables": { - "input": { - "termUrn": "urn:li:glossaryTerm:SavingAccount", - "resourceUrn": dataset_urn, - } - }, + remove_mutation = """mutation removeTerm($input: TermAssociationInput!) { + removeTerm(input: $input) + }""" + remove_variables: Dict[str, Any] = { + "input": { + "termUrn": "urn:li:glossaryTerm:SavingAccount", + "resourceUrn": dataset_urn, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json - ) - response.raise_for_status() - res_data = response.json() - + res_data = execute_graphql(auth_session, remove_mutation, remove_variables) print(res_data) - - assert res_data - assert res_data["data"] assert res_data["data"]["removeTerm"] is True + # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, dataset_query, dataset_variables) assert res_data["data"]["dataset"]["glossaryTerms"] == {"terms": []} @@ -433,51 +297,27 @@ def test_update_schemafield(auth_session): } # dataset schema tags - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + query_tags: str = dataset_schema_json_tags["query"] # type: ignore + variables_tags: Dict[str, Any] = dataset_schema_json_tags["variables"] # type: ignore + res_data = execute_graphql(auth_session, query_tags, variables_tags) assert res_data["data"]["dataset"]["editableSchemaMetadata"] is None - add_json = { - "query": """mutation addTag($input: TagAssociationInput!) {\n + add_query = """mutation addTag($input: TagAssociationInput!) {\n addTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": "urn:li:tag:Legacy", - "resourceUrn": dataset_urn, - "subResource": "[version=2.0].[type=boolean].field_bar", - "subResourceType": "DATASET_FIELD", - } - }, + }""" + add_variables: Dict[str, Any] = { + "input": { + "tagUrn": "urn:li:tag:Legacy", + "resourceUrn": dataset_urn, + "subResource": "[version=2.0].[type=boolean].field_bar", + "subResourceType": "DATASET_FIELD", + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, add_query, add_variables) assert res_data["data"]["addTag"] is True # Refetch the dataset schema - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, query_tags, variables_tags) assert res_data["data"]["dataset"]["editableSchemaMetadata"] == { "editableSchemaFieldInfo": [ { @@ -496,80 +336,45 @@ def test_update_schemafield(auth_session): ] } - remove_json = { - "query": """mutation removeTag($input: TagAssociationInput!) {\n + remove_query = """mutation removeTag($input: TagAssociationInput!) {\n removeTag(input: $input) - }""", - "variables": { - "input": { - "tagUrn": "urn:li:tag:Legacy", - "resourceUrn": dataset_urn, - "subResource": "[version=2.0].[type=boolean].field_bar", - "subResourceType": "DATASET_FIELD", - } - }, + }""" + remove_variables: Dict[str, Any] = { + "input": { + "tagUrn": "urn:li:tag:Legacy", + "resourceUrn": dataset_urn, + "subResource": "[version=2.0].[type=boolean].field_bar", + "subResourceType": "DATASET_FIELD", + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json - ) - response.raise_for_status() - res_data = response.json() - + res_data = execute_graphql(auth_session, remove_query, remove_variables) print(res_data) - - assert res_data - assert res_data["data"] assert res_data["data"]["removeTag"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, query_tags, variables_tags) assert res_data["data"]["dataset"]["editableSchemaMetadata"] == { "editableSchemaFieldInfo": [{"globalTags": {"tags": []}}] } - add_json = { - "query": """mutation addTerm($input: TermAssociationInput!) {\n + add_query = """mutation addTerm($input: TermAssociationInput!) {\n addTerm(input: $input) - }""", - "variables": { - "input": { - "termUrn": "urn:li:glossaryTerm:SavingAccount", - "resourceUrn": dataset_urn, - "subResource": "[version=2.0].[type=boolean].field_bar", - "subResourceType": "DATASET_FIELD", - } - }, + }""" + add_variables = { + "input": { + "termUrn": "urn:li:glossaryTerm:SavingAccount", + "resourceUrn": dataset_urn, + "subResource": "[version=2.0].[type=boolean].field_bar", + "subResourceType": "DATASET_FIELD", + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, add_query, add_variables) assert res_data["data"]["addTerm"] is True # Refetch the dataset schema - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_terms - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + query_terms: str = dataset_schema_json_terms["query"] # type: ignore + variables_terms: Dict[str, Any] = dataset_schema_json_terms["variables"] # type: ignore + res_data = execute_graphql(auth_session, query_terms, variables_terms) assert res_data["data"]["dataset"]["editableSchemaMetadata"] == { "editableSchemaFieldInfo": [ { @@ -587,50 +392,28 @@ def test_update_schemafield(auth_session): ] } - remove_json = { - "query": """mutation removeTerm($input: TermAssociationInput!) {\n + remove_query = """mutation removeTerm($input: TermAssociationInput!) {\n removeTerm(input: $input) - }""", - "variables": { - "input": { - "termUrn": "urn:li:glossaryTerm:SavingAccount", - "resourceUrn": dataset_urn, - "subResource": "[version=2.0].[type=boolean].field_bar", - "subResourceType": "DATASET_FIELD", - } - }, + }""" + remove_variables = { + "input": { + "termUrn": "urn:li:glossaryTerm:SavingAccount", + "resourceUrn": dataset_urn, + "subResource": "[version=2.0].[type=boolean].field_bar", + "subResourceType": "DATASET_FIELD", + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + res_data = execute_graphql(auth_session, remove_query, remove_variables) assert res_data["data"]["removeTerm"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_terms - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, query_terms, variables_terms) assert res_data["data"]["dataset"]["editableSchemaMetadata"] == { "editableSchemaFieldInfo": [{"glossaryTerms": {"terms": []}}] } # dataset schema tags - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, query_tags, variables_tags) update_description_json = { "query": """mutation updateDescription($input: DescriptionUpdateInput!) {\n @@ -647,41 +430,20 @@ def test_update_schemafield(auth_session): } # fetch no description - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", - json=dataset_schema_json_description, - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + query_description: str = dataset_schema_json_description["query"] # type: ignore + variables_description: Dict[str, Any] = dataset_schema_json_description["variables"] # type: ignore + res_data = execute_graphql(auth_session, query_description, variables_description) assert res_data["data"]["dataset"]["editableSchemaMetadata"] == { "editableSchemaFieldInfo": [{"description": None}] } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_description_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] + update_query: str = update_description_json["query"] # type: ignore + update_variables: Dict[str, Any] = update_description_json["variables"] # type: ignore + res_data = execute_graphql(auth_session, update_query, update_variables) assert res_data["data"]["updateDescription"] is True # Refetch the dataset - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", - json=dataset_schema_json_description, - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["dataset"] + res_data = execute_graphql(auth_session, query_description, variables_description) assert res_data["data"]["dataset"]["editableSchemaMetadata"] == { "editableSchemaFieldInfo": [{"description": "new description"}] } diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py index 10d1afc36784..7841b73f98fa 100644 --- a/smoke-test/tests/tests/tests_test.py +++ b/smoke-test/tests/tests/tests_test.py @@ -1,5 +1,5 @@ import time -from typing import List +from typing import Any, Dict, List import pytest import tenacity @@ -7,6 +7,7 @@ from tests.utils import ( delete_urns, delete_urns_from_file, + execute_graphql, get_sleep_info, ingest_file_via_rest, ) @@ -37,31 +38,22 @@ def create_test(auth_session, test_id="test id"): TEST_URNS.extend([f"urn:li:test:{test_id}"]) # Create new Test - create_test_json = { - "query": """mutation createTest($input: CreateTestInput!) {\n + create_test_query = """mutation createTest($input: CreateTestInput!) { createTest(input: $input) - }""", - "variables": { - "input": { - "id": test_id, - "name": test_name, - "category": test_category, - "description": test_description, - "definition": {"json": "{}"}, - } - }, + }""" + create_test_variables: Dict[str, Any] = { + "input": { + "id": test_id, + "name": test_name, + "category": test_category, + "description": test_description, + "definition": {"json": "{}"}, + } } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=create_test_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, create_test_query, create_test_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["createTest"] is not None - assert "errors" not in res_data return res_data["data"]["createTest"] @@ -71,36 +63,29 @@ def test_get_test_results(auth_session): urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tests-sample,PROD)" # Test urn ) - json = { - "query": """query getDataset($urn: String!) {\n - dataset(urn: $urn) {\n - urn\n - testResults {\n - failing {\n - test {\n - urn\n - }\n + query = """query getDataset($urn: String!) { + dataset(urn: $urn) { + urn + testResults { + failing { + test { + urn + } type - }\n - passing {\n - test {\n - urn\n - }\n + } + passing { + test { + urn + } type - }\n - }\n - }\n - }""", - "variables": {"urn": urn}, - } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=json - ) - response.raise_for_status() - res_data = response.json() + } + } + } + }""" + variables: Dict[str, Any] = {"urn": urn} + + res_data = execute_graphql(auth_session, query, variables) - assert res_data - assert res_data["data"] assert res_data["data"]["dataset"] assert res_data["data"]["dataset"]["urn"] == urn assert res_data["data"]["dataset"]["testResults"] == { @@ -114,28 +99,21 @@ def test_create_test(auth_session): test_urn = create_test(auth_session) # Get the test - get_test_json = { - "query": """query test($urn: String!) {\n - test(urn: $urn) { \n - urn\n - name\n - category\n - description\n - definition {\n - json\n - }\n + get_test_query = """query test($urn: String!) { + test(urn: $urn) { + urn + name + category + description + definition { + json + } } - }""", - "variables": {"urn": test_urn}, - } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_test_json - ) - response.raise_for_status() - res_data = response.json() + }""" + get_test_variables: Dict[str, Any] = {"urn": test_urn} + + res_data = execute_graphql(auth_session, get_test_query, get_test_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["test"] == { "urn": test_urn, "name": test_name, @@ -145,17 +123,11 @@ def test_create_test(auth_session): "json": "{}", }, } - assert "errors" not in res_data # Ensure that soft-deleted tests - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_test_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, get_test_query, get_test_variables) assert res_data["data"]["test"] is not None - assert "errors" not in res_data @pytest.mark.dependency(depends=["test_create_test"]) @@ -166,55 +138,39 @@ def test_update_test(auth_session): test_description = "new description" # Update Test - update_test_json = { - "query": """mutation updateTest($urn: String!, $input: UpdateTestInput!) {\n + update_test_query = """mutation updateTest($urn: String!, $input: UpdateTestInput!) { updateTest(urn: $urn, input: $input) - }""", - "variables": { - "urn": test_urn, - "input": { - "name": test_name, - "category": test_category, - "description": test_description, - "definition": {"json": "{}"}, - }, + }""" + update_test_variables: Dict[str, Any] = { + "urn": test_urn, + "input": { + "name": test_name, + "category": test_category, + "description": test_description, + "definition": {"json": "{}"}, }, } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_test_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, update_test_query, update_test_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["updateTest"] is not None - assert "errors" not in res_data # Get the test - get_test_json = { - "query": """query test($urn: String!) {\n - test(urn: $urn) { \n - urn\n - name\n - category\n - description\n - definition {\n - json\n - }\n + get_test_query = """query test($urn: String!) { + test(urn: $urn) { + urn + name + category + description + definition { + json + } } - }""", - "variables": {"urn": test_urn}, - } - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=get_test_json - ) - response.raise_for_status() - res_data = response.json() + }""" + get_test_variables: Dict[str, Any] = {"urn": test_urn} + + res_data = execute_graphql(auth_session, get_test_query, get_test_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["test"] == { "urn": test_urn, "name": test_name, @@ -224,38 +180,28 @@ def test_update_test(auth_session): "json": "{}", }, } - assert "errors" not in res_data @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def test_list_tests_retries(auth_session): - list_tests_json = { - "query": """query listTests($input: ListTestsInput!) {\n - listTests(input: $input) {\n - start\n - count\n - total\n - tests {\n - urn\n - }\n - }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } + list_tests_query = """query listTests($input: ListTestsInput!) { + listTests(input: $input) { + start + count + total + tests { + urn + } + } + }""" + list_tests_variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_tests_json - ) - response.raise_for_status() - res_data = response.json() + res_data = execute_graphql(auth_session, list_tests_query, list_tests_variables) - assert res_data - assert res_data["data"] assert res_data["data"]["listTests"]["total"] >= 2 assert len(res_data["data"]["listTests"]["tests"]) >= 2 - assert "errors" not in res_data @pytest.mark.dependency(depends=["test_update_test"]) diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 30022345f15b..fa42b5eff45e 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -113,15 +113,24 @@ def access_token_setup(auth_session, auth_exclude_filter): res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] + + if res_data["data"]["listAccessTokens"]["tokens"]: + for metadata in res_data["data"]["listAccessTokens"]["tokens"]: + revokeAccessToken(admin_session, metadata["id"]) + wait_for_writes_to_sync() + + # Verify clean state after cleanup + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data["data"]["listAccessTokens"]["total"] == 0 assert not res_data["data"]["listAccessTokens"]["tokens"] yield - # Clean up + # Clean up after the test res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) for metadata in res_data["data"]["listAccessTokens"]["tokens"]: revokeAccessToken(admin_session, metadata["id"]) + wait_for_writes_to_sync() def test_admin_can_create_list_and_revoke_tokens(auth_exclude_filter): diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index 2aaa515980fd..afaefff48452 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -140,6 +140,54 @@ def check_endpoint(auth_session, url): raise SystemExit(f"{url}: is Not reachable \nErr: {e}") +def delete_entity(auth_session, urn: str) -> None: + delete_json = {"urn": urn} + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=delete", json=delete_json + ) + + response.raise_for_status() + + +def execute_graphql( + auth_session, + query: str, + variables: Optional[Dict[str, Any]] = None, + expect_errors: bool = False, +) -> Dict[str, Any]: + """Execute a GraphQL query with standard error handling. + + Args: + auth_session: Authenticated session for making requests + query: GraphQL query string + variables: Optional dictionary of GraphQL variables + + Returns: + Response data dictionary + + Example: + >>> query = "query getDataset($urn: String!) { dataset(urn: $urn) { name } }" + >>> variables = {"urn": "urn:li:dataset:(...)"} + >>> res_data = execute_graphql(auth_session, query, variables) + >>> dataset_name = res_data["data"]["dataset"]["name"] + """ + json_payload: Dict[str, Any] = {"query": query} + if variables: + json_payload["variables"] = variables + + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_payload + ) + response.raise_for_status() + res_data = response.json() + + assert res_data, "GraphQL response is empty" + assert res_data.get("data") is not None, "GraphQL response.data is None" + assert "errors" not in res_data, f"GraphQL errors: {res_data.get('errors')}" + + return res_data + + def run_datahub_cmd( command: List[str], *, diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py index 521b54ffa0ed..b6e243db67c8 100644 --- a/smoke-test/tests/views/views_test.py +++ b/smoke-test/tests/views/views_test.py @@ -1,7 +1,9 @@ +from typing import Any, Dict + import pytest import tenacity -from tests.utils import get_sleep_info +from tests.utils import execute_graphql, get_sleep_info sleep_sec, sleep_times = get_sleep_info() @@ -9,18 +11,15 @@ @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_more_views(auth_session, list_views_json, query_name, before_count): - # Get new count of Views - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_views_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] +def _ensure_more_views( + auth_session, + query: str, + variables: Dict[str, Any], + query_name: str, + before_count: int, +) -> int: + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"][query_name]["total"] is not None - assert "errors" not in res_data # Assert that there are more views now. after_count = res_data["data"][query_name]["total"] @@ -32,20 +31,17 @@ def _ensure_more_views(auth_session, list_views_json, query_name, before_count): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_less_views(auth_session, list_views_json, query_name, before_count): - # Get new count of Views - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_views_json - ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] +def _ensure_less_views( + auth_session, + query: str, + variables: Dict[str, Any], + query_name: str, + before_count: int, +) -> None: + res_data = execute_graphql(auth_session, query, variables) assert res_data["data"][query_name]["total"] is not None - assert "errors" not in res_data - # Assert that there are more views now. + # Assert that there are fewer views now. after_count = res_data["data"][query_name]["total"] print(f"after_count is {after_count}") assert after_count == before_count - 1 @@ -54,44 +50,36 @@ def _ensure_less_views(auth_session, list_views_json, query_name, before_count): @pytest.mark.dependency() def test_create_list_delete_global_view(auth_session): # Get count of existing views - list_global_views_json = { - "query": """query listGlobalViews($input: ListGlobalViewsInput!) {\n - listGlobalViews(input: $input) {\n - start\n - count\n - total\n - views {\n - urn\n - viewType\n - name\n - description\n - definition {\n - entityTypes\n - filter {\n - operator\n - filters {\n - field\n - values\n - condition\n - }\n - }\n - }\n - }\n - }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } + list_global_views_query = """query listGlobalViews($input: ListGlobalViewsInput!) { + listGlobalViews(input: $input) { + start + count + total + views { + urn + viewType + name + description + definition { + entityTypes + filter { + operator + filters { + field + values + condition + } + } + } + } + } + }""" + list_global_views_variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_global_views_json + res_data = execute_graphql( + auth_session, list_global_views_query, list_global_views_variables ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["listGlobalViews"]["total"] is not None - assert "errors" not in res_data before_count = res_data["data"]["listGlobalViews"]["total"] @@ -113,60 +101,45 @@ def test_create_list_delete_global_view(auth_session): } # Create new View - create_view_json = { - "query": """mutation createView($input: CreateViewInput!) {\n - createView(input: $input) {\n - urn\n - }\n - }""", - "variables": { - "input": { - "viewType": "GLOBAL", - "name": new_view_name, - "description": new_view_description, - "definition": new_view_definition, - } - }, + create_view_mutation = """mutation createView($input: CreateViewInput!) { + createView(input: $input) { + urn + } + }""" + create_view_variables: Dict[str, Any] = { + "input": { + "viewType": "GLOBAL", + "name": new_view_name, + "description": new_view_description, + "definition": new_view_definition, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=create_view_json + res_data = execute_graphql( + auth_session, create_view_mutation, create_view_variables ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["createView"] is not None - assert "errors" not in res_data view_urn = res_data["data"]["createView"]["urn"] new_count = _ensure_more_views( auth_session=auth_session, - list_views_json=list_global_views_json, + query=list_global_views_query, + variables=list_global_views_variables, query_name="listGlobalViews", before_count=before_count, ) # Delete the View - delete_view_json = { - "query": """mutation deleteView($urn: String!) {\n - deleteView(urn: $urn) - }""", - "variables": {"urn": view_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_view_json - ) - response.raise_for_status() - res_data = response.json() - assert "errors" not in res_data + delete_view_mutation = """mutation deleteView($urn: String!) { + deleteView(urn: $urn) + }""" + delete_view_variables: Dict[str, Any] = {"urn": view_urn} + execute_graphql(auth_session, delete_view_mutation, delete_view_variables) _ensure_less_views( auth_session=auth_session, - list_views_json=list_global_views_json, + query=list_global_views_query, + variables=list_global_views_variables, query_name="listGlobalViews", before_count=new_count, ) @@ -175,44 +148,36 @@ def test_create_list_delete_global_view(auth_session): @pytest.mark.dependency(depends=["test_create_list_delete_global_view"]) def test_create_list_delete_personal_view(auth_session): # Get count of existing views - list_my_views_json = { - "query": """query listMyViews($input: ListMyViewsInput!) {\n - listMyViews(input: $input) {\n - start\n - count\n - total\n - views {\n - urn\n - viewType\n - name\n - description\n - definition {\n - entityTypes\n - filter {\n - operator\n - filters {\n - field\n - values\n - condition\n - }\n - }\n - }\n - }\n - }\n - }""", - "variables": {"input": {"start": 0, "count": 20}}, - } + list_my_views_query = """query listMyViews($input: ListMyViewsInput!) { + listMyViews(input: $input) { + start + count + total + views { + urn + viewType + name + description + definition { + entityTypes + filter { + operator + filters { + field + values + condition + } + } + } + } + } + }""" + list_my_views_variables: Dict[str, Any] = {"input": {"start": 0, "count": 20}} - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=list_my_views_json + res_data = execute_graphql( + auth_session, list_my_views_query, list_my_views_variables ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["listMyViews"]["total"] is not None - assert "errors" not in res_data before_count = res_data["data"]["listMyViews"]["total"] @@ -234,60 +199,45 @@ def test_create_list_delete_personal_view(auth_session): } # Create new View - create_view_json = { - "query": """mutation createView($input: CreateViewInput!) {\n - createView(input: $input) {\n - urn\n - }\n - }""", - "variables": { - "input": { - "viewType": "PERSONAL", - "name": new_view_name, - "description": new_view_description, - "definition": new_view_definition, - } - }, + create_view_mutation = """mutation createView($input: CreateViewInput!) { + createView(input: $input) { + urn + } + }""" + create_view_variables: Dict[str, Any] = { + "input": { + "viewType": "PERSONAL", + "name": new_view_name, + "description": new_view_description, + "definition": new_view_definition, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=create_view_json + res_data = execute_graphql( + auth_session, create_view_mutation, create_view_variables ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["createView"] is not None - assert "errors" not in res_data view_urn = res_data["data"]["createView"]["urn"] new_count = _ensure_more_views( auth_session=auth_session, - list_views_json=list_my_views_json, + query=list_my_views_query, + variables=list_my_views_variables, query_name="listMyViews", before_count=before_count, ) # Delete the View - delete_view_json = { - "query": """mutation deleteView($urn: String!) {\n - deleteView(urn: $urn) - }""", - "variables": {"urn": view_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_view_json - ) - response.raise_for_status() - res_data = response.json() - assert "errors" not in res_data + delete_view_mutation = """mutation deleteView($urn: String!) { + deleteView(urn: $urn) + }""" + delete_view_variables: Dict[str, Any] = {"urn": view_urn} + execute_graphql(auth_session, delete_view_mutation, delete_view_variables) _ensure_less_views( auth_session=auth_session, - list_views_json=list_my_views_json, + query=list_my_views_query, + variables=list_my_views_variables, query_name="listMyViews", before_count=new_count, ) @@ -314,32 +264,23 @@ def test_update_global_view(auth_session): } # Create new View - create_view_json = { - "query": """mutation createView($input: CreateViewInput!) {\n - createView(input: $input) {\n - urn\n - }\n - }""", - "variables": { - "input": { - "viewType": "PERSONAL", - "name": new_view_name, - "description": new_view_description, - "definition": new_view_definition, - } - }, + create_view_mutation = """mutation createView($input: CreateViewInput!) { + createView(input: $input) { + urn + } + }""" + create_view_variables: Dict[str, Any] = { + "input": { + "viewType": "PERSONAL", + "name": new_view_name, + "description": new_view_description, + "definition": new_view_definition, + } } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=create_view_json + res_data = execute_graphql( + auth_session, create_view_mutation, create_view_variables ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] assert res_data["data"]["createView"] is not None - assert "errors" not in res_data view_urn = res_data["data"]["createView"]["urn"] @@ -360,43 +301,27 @@ def test_update_global_view(auth_session): }, } - update_view_json = { - "query": """mutation updateView($urn: String!, $input: UpdateViewInput!) {\n - updateView(urn: $urn, input: $input) {\n - urn\n - }\n - }""", - "variables": { - "urn": view_urn, - "input": { - "name": new_view_name, - "description": new_view_description, - "definition": new_view_definition, - }, + update_view_mutation = """mutation updateView($urn: String!, $input: UpdateViewInput!) { + updateView(urn: $urn, input: $input) { + urn + } + }""" + update_view_variables: Dict[str, Any] = { + "urn": view_urn, + "input": { + "name": new_view_name, + "description": new_view_description, + "definition": new_view_definition, }, } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=update_view_json + res_data = execute_graphql( + auth_session, update_view_mutation, update_view_variables ) - response.raise_for_status() - res_data = response.json() - - assert res_data assert res_data["data"]["updateView"] is not None - assert "errors" not in res_data # Delete the View - delete_view_json = { - "query": """mutation deleteView($urn: String!) {\n - deleteView(urn: $urn) - }""", - "variables": {"urn": view_urn}, - } - - response = auth_session.post( - f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_view_json - ) - response.raise_for_status() - res_data = response.json() - assert "errors" not in res_data + delete_view_mutation = """mutation deleteView($urn: String!) { + deleteView(urn: $urn) + }""" + delete_view_variables: Dict[str, Any] = {"urn": view_urn} + execute_graphql(auth_session, delete_view_mutation, delete_view_variables)