mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-14 20:26:53 +00:00
fix(spark-lineage): Rename table.platform to table.hive_platform_alias (#6311)
This commit is contained in:
parent
37ac31c06f
commit
ff3a9dd4b9
@ -122,20 +122,20 @@ The Spark agent can be configured using Databricks Cluster [Spark configuration]
|
|||||||
|
|
||||||
## Configuration Options
|
## Configuration Options
|
||||||
|
|
||||||
| Field | Required | Default | Description |
|
| Field | Required | Default | Description |
|
||||||
|-------------------------------------------------|----------|---------|-------------------------------------------------------------------------|
|
|--------------------------------------------------|----------|---------|---------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| spark.jars.packages | ✅ | | Set with latest/required version io.acryl:datahub-spark-lineage:0.8.23 |
|
| spark.jars.packages | ✅ | | Set with latest/required version io.acryl:datahub-spark-lineage:0.8.23 |
|
||||||
| spark.extraListeners | ✅ | | datahub.spark.DatahubSparkListener |
|
| spark.extraListeners | ✅ | | datahub.spark.DatahubSparkListener |
|
||||||
| spark.datahub.rest.server | ✅ | | Datahub server url eg:<http://localhost:8080> |
|
| spark.datahub.rest.server | ✅ | | Datahub server url eg:<http://localhost:8080> |
|
||||||
| spark.datahub.rest.token | | | Authentication token. |
|
| spark.datahub.rest.token | | | Authentication token. |
|
||||||
| spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! |
|
| spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! |
|
||||||
| spark.datahub.metadata.pipeline.platformInstance| | | Pipeline level platform instance |
|
| spark.datahub.metadata.pipeline.platformInstance | | | Pipeline level platform instance |
|
||||||
| spark.datahub.metadata.dataset.platformInstance| | | dataset level platform instance |
|
| spark.datahub.metadata.dataset.platformInstance | | | dataset level platform instance |
|
||||||
| spark.datahub.metadata.dataset.env | | PROD | [Supported values](https://datahubproject.io/docs/graphql/enums#fabrictype). In all other cases, will fallback to PROD |
|
| spark.datahub.metadata.dataset.env | | PROD | [Supported values](https://datahubproject.io/docs/graphql/enums#fabrictype). In all other cases, will fallback to PROD |
|
||||||
| spark.datahub.metadata.table.Platform | | hive | Platform for tables |
|
| spark.datahub.metadata.table.hive_platform_alias | | hive | By default, datahub assigns Hive-like tables to the Hive platform. If you are using Glue as your Hive metastore, set this config flag to `glue` |
|
||||||
| spark.datahub.metadata.include_scheme | | true | Include scheme (e.g. hdfs://, s3://) in dataset URN |
|
| spark.datahub.metadata.include_scheme | | true | Include scheme from the path URI (e.g. hdfs://, s3://) in the dataset URN. We recommend setting this value to false, it is set to true for backwards compatibility with previous versions |
|
||||||
| spark.datahub.coalesce_jobs | | false | Only one datajob(taask) will be emitted containing all input and output datasets for the spark application |
|
| spark.datahub.coalesce_jobs | | false | Only one datajob(task) will be emitted containing all input and output datasets for the spark application |
|
||||||
| spark.datahub.parent.datajob_urn | | | Specified dataset will be set as upstream dataset for datajob created. Effective only when spark.datahub.coalesce_jobs is set to true |
|
| spark.datahub.parent.datajob_urn | | | Specified dataset will be set as upstream dataset for datajob created. Effective only when spark.datahub.coalesce_jobs is set to true |
|
||||||
|
|
||||||
## What to Expect: The Metadata Model
|
## What to Expect: The Metadata Model
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ public class DatasetExtractor {
|
|||||||
CreateDataSourceTableAsSelectCommand.class, CreateHiveTableAsSelectCommand.class, InsertIntoHiveTable.class);
|
CreateDataSourceTableAsSelectCommand.class, CreateHiveTableAsSelectCommand.class, InsertIntoHiveTable.class);
|
||||||
private static final String DATASET_ENV_KEY = "metadata.dataset.env";
|
private static final String DATASET_ENV_KEY = "metadata.dataset.env";
|
||||||
private static final String DATASET_PLATFORM_INSTANCE_KEY = "metadata.dataset.platformInstance";
|
private static final String DATASET_PLATFORM_INSTANCE_KEY = "metadata.dataset.platformInstance";
|
||||||
private static final String TABLE_PLATFORM_KEY = "metadata.table.platform";
|
private static final String TABLE_HIVE_PLATFORM_ALIAS = "metadata.table.hive_platform_alias";
|
||||||
private static final String INCLUDE_SCHEME_KEY = "metadata.include_scheme";
|
private static final String INCLUDE_SCHEME_KEY = "metadata.include_scheme";
|
||||||
// TODO InsertIntoHiveDirCommand, InsertIntoDataSourceDirCommand
|
// TODO InsertIntoHiveDirCommand, InsertIntoDataSourceDirCommand
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ public class DatasetExtractor {
|
|||||||
InsertIntoHadoopFsRelationCommand cmd = (InsertIntoHadoopFsRelationCommand) p;
|
InsertIntoHadoopFsRelationCommand cmd = (InsertIntoHadoopFsRelationCommand) p;
|
||||||
if (cmd.catalogTable().isDefined()) {
|
if (cmd.catalogTable().isDefined()) {
|
||||||
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.catalogTable().get(),
|
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.catalogTable().get(),
|
||||||
getCommonPlatformInstance(datahubConfig), getTablePlatform(datahubConfig),
|
getCommonPlatformInstance(datahubConfig), getTableHivePlatformAlias(datahubConfig),
|
||||||
getCommonFabricType(datahubConfig))));
|
getCommonFabricType(datahubConfig))));
|
||||||
}
|
}
|
||||||
return Optional.of(Collections.singletonList(new HdfsPathDataset(cmd.outputPath(),
|
return Optional.of(Collections.singletonList(new HdfsPathDataset(cmd.outputPath(),
|
||||||
@ -157,26 +157,26 @@ public class DatasetExtractor {
|
|||||||
CreateDataSourceTableAsSelectCommand cmd = (CreateDataSourceTableAsSelectCommand) p;
|
CreateDataSourceTableAsSelectCommand cmd = (CreateDataSourceTableAsSelectCommand) p;
|
||||||
// TODO what of cmd.mode()
|
// TODO what of cmd.mode()
|
||||||
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.table(),
|
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.table(),
|
||||||
getCommonPlatformInstance(datahubConfig), getTablePlatform(datahubConfig),
|
getCommonPlatformInstance(datahubConfig), getTableHivePlatformAlias(datahubConfig),
|
||||||
getCommonFabricType(datahubConfig))));
|
getCommonFabricType(datahubConfig))));
|
||||||
});
|
});
|
||||||
PLAN_TO_DATASET.put(CreateHiveTableAsSelectCommand.class, (p, ctx, datahubConfig) -> {
|
PLAN_TO_DATASET.put(CreateHiveTableAsSelectCommand.class, (p, ctx, datahubConfig) -> {
|
||||||
CreateHiveTableAsSelectCommand cmd = (CreateHiveTableAsSelectCommand) p;
|
CreateHiveTableAsSelectCommand cmd = (CreateHiveTableAsSelectCommand) p;
|
||||||
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.tableDesc(),
|
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.tableDesc(),
|
||||||
getCommonPlatformInstance(datahubConfig), getTablePlatform(datahubConfig),
|
getCommonPlatformInstance(datahubConfig), getTableHivePlatformAlias(datahubConfig),
|
||||||
getCommonFabricType(datahubConfig))));
|
getCommonFabricType(datahubConfig))));
|
||||||
});
|
});
|
||||||
PLAN_TO_DATASET.put(InsertIntoHiveTable.class, (p, ctx, datahubConfig) -> {
|
PLAN_TO_DATASET.put(InsertIntoHiveTable.class, (p, ctx, datahubConfig) -> {
|
||||||
InsertIntoHiveTable cmd = (InsertIntoHiveTable) p;
|
InsertIntoHiveTable cmd = (InsertIntoHiveTable) p;
|
||||||
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.table(),
|
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.table(),
|
||||||
getCommonPlatformInstance(datahubConfig), getTablePlatform(datahubConfig),
|
getCommonPlatformInstance(datahubConfig), getTableHivePlatformAlias(datahubConfig),
|
||||||
getCommonFabricType(datahubConfig))));
|
getCommonFabricType(datahubConfig))));
|
||||||
});
|
});
|
||||||
|
|
||||||
PLAN_TO_DATASET.put(HiveTableRelation.class, (p, ctx, datahubConfig) -> {
|
PLAN_TO_DATASET.put(HiveTableRelation.class, (p, ctx, datahubConfig) -> {
|
||||||
HiveTableRelation cmd = (HiveTableRelation) p;
|
HiveTableRelation cmd = (HiveTableRelation) p;
|
||||||
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.tableMeta(),
|
return Optional.of(Collections.singletonList(new CatalogTableDataset(cmd.tableMeta(),
|
||||||
getCommonPlatformInstance(datahubConfig), getTablePlatform(datahubConfig),
|
getCommonPlatformInstance(datahubConfig), getTableHivePlatformAlias(datahubConfig),
|
||||||
getCommonFabricType(datahubConfig))));
|
getCommonFabricType(datahubConfig))));
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -268,8 +268,8 @@ public class DatasetExtractor {
|
|||||||
: null;
|
: null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getTablePlatform(Config datahubConfig) {
|
private static String getTableHivePlatformAlias(Config datahubConfig) {
|
||||||
return datahubConfig.hasPath(TABLE_PLATFORM_KEY) ? datahubConfig.getString(TABLE_PLATFORM_KEY)
|
return datahubConfig.hasPath(TABLE_HIVE_PLATFORM_ALIAS) ? datahubConfig.getString(TABLE_HIVE_PLATFORM_ALIAS)
|
||||||
: "hive";
|
: "hive";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user