import { BaseEntity, IBaseEntityStatics, statics } from '@datahub/data-models/entity/base-entity'; import { saveDatasetComplianceSuggestionFeedbackByUrn } from '@datahub/data-models/api/dataset/compliance'; import { readDatasetSchema } from '@datahub/data-models/api/dataset/schema'; import DatasetSchema from '@datahub/data-models/entity/dataset/modules/schema'; import { set, setProperties, computed } from '@ember/object'; import { IEntityRenderProps } from '@datahub/data-models/types/entity/rendering/entity-render-props'; import { DatasetExportPolicy } from '@datahub/data-models/entity/dataset/modules/export-policy'; import { DatasetPurgePolicy } from '@datahub/data-models/entity/dataset/modules/purge-policy'; import { DatasetPlatform } from '@datahub/metadata-types/constants/entity/dataset/platform'; import { decodeUrn } from '@datahub/utils/validators/urn'; import { IDataPlatform } from '@datahub/metadata-types/types/entity/dataset/platform'; import { DatasetLineage } from '@datahub/data-models/entity/dataset/modules/lineage'; import { readUpstreamDatasets } from '@datahub/data-models/api/dataset/lineage'; import { DatasetLineageList } from '@datahub/metadata-types/types/entity/dataset/lineage'; import { getRenderProps } from '@datahub/data-models/entity/dataset/render-props'; import { NotImplementedError } from '@datahub/data-models/constants/entity/shared'; import { every } from 'lodash-es'; import DatasetComplianceSuggestion from '@datahub/data-models/entity/dataset/modules/compliance-suggestion'; import { SuggestionIntent } from '@datahub/data-models/constants/entity/dataset/compliance-suggestions'; import { FabricType } from '@datahub/metadata-types/constants/common/fabric-type'; import { getDatasetUrnParts } from '@datahub/data-models/entity/dataset/utils/urn'; import { readDataPlatforms } from '@datahub/data-models/api/dataset/platforms'; import { getDefaultIfNotFoundError } from '@datahub/utils/api/error'; import { Snapshot } from '@datahub/metadata-types/types/metadata/snapshot'; import { oneWay, alias, reads } from '@ember/object/computed'; import { IDatasetEntity } from '@datahub/metadata-types/types/entity/dataset/dataset-entity'; import { readDatasetOwnersByUrn } from '@datahub/data-models/api/dataset/ownership'; import { transformOwnersResponseIntoOwners } from '@datahub/data-models/entity/dataset/utils/owner'; import { readInstitutionalMemory, writeInstitutionalMemory } from '@datahub/data-models/entity/institutional-memory'; import { relationship } from '@datahub/data-models/relationships/decorator'; import { PersonEntity } from '@datahub/data-models/entity/person/person-entity'; import titleize from '@nacho-ui/core/utils/strings/titleize'; /** * Defines the data model for the Dataset entity. */ @statics>() export class DatasetEntity extends BaseEntity { /** * The human friendly alias for Dataset entities */ static displayName: 'datasets' = 'datasets'; // TODO: [META-9120] This logic should not exist on the UI and is a temporary fix that will hopefully one day // get changed and read from API level instead. Also it won't matter once we fully migrate to new compliance /** * Calculates whether or not to show this dataset compliance as inherited from parents based on the nature * of the upstream releationships * @param upstreams - representation of upstream lineage information. Typing can change if used in data-portal */ static hasInheritedCompliance(upstreams: Array = []): boolean { if (!upstreams.length) { return false; } // Rule 1: If all upstreams are transformed, then we don't want to show this as inherited compliance const upstreamsAreAllTransformed: boolean = every( upstreams, (upstream): boolean => upstream.type === 'TRANSFORMED' ); return !upstreamsAreAllTransformed; } /** * There is no snapshot for datasets, returning undefined but * implementing since, framework may call it */ get readSnapshot(): Promise | Promise { return Promise.resolve(undefined); } /** * Additional hook to fetch the platforms needed for some operation in datasets */ async onAfterCreate(): Promise { const dataPlatforms: Array = await readDataPlatforms(); const currentDataPlatform = dataPlatforms.find((platform): boolean => platform.name === this.platform); // Reads dataset owner information from the resolved ownership endpoint const ownersResponse = await readDatasetOwnersByUrn(this.urn); // Transforms the owners response into a more generic owners response and sets it on the entity const owners = transformOwnersResponseIntoOwners(ownersResponse); setProperties(this, { owners, currentDataPlatform }); } get displayName(): 'datasets' { return DatasetEntity.displayName; } /** * Discriminant when included in a tagged Entity union */ static kind = 'DatasetEntity'; get kind(): string { return DatasetEntity.kind; } /** * Shows the urn as an unencoded value */ get rawUrn(): string { return decodeUrn(this.urn); } /** * Returns the fabric/environment for this particular dataset entity */ get fabric(): FabricType | undefined { throw new Error(NotImplementedError); } /** * Reads the snapshots for a list of Dataset urns * @static */ static readSnapshots(_urns: Array): Promise> { throw new Error(NotImplementedError); } /** * Holds the classified data retrieved from the API layer for the schema information as a DatasetSchema * class * @type {DatasetSchema} */ schema?: DatasetSchema; /** * Holds the classified data retrieved from the API layer for the export policy as a DatasetExportPolicy * class * @type {DatasetExportPolicy} */ exportPolicy?: DatasetExportPolicy; /** * Holds the classified data retrieved from the API layer for the purge policy as a DatasetPurgePOlicy * class * @type {DatasetPurgePolicy} */ purgePolicy?: DatasetPurgePolicy; /** * Holds the classified data retrieved from the API layer for the upstream datasets list as an array of * DatasetLineage classes * @type {Array} */ upstreams?: Array; /** * Allows us to, instead of relying on the actual lineage objects for fetching the upstream datasets, create a * connection to the raw information within the instantiated lineage piece to form a relationship to other dataset * entities * @note WARNING - do not use this property directly. Because of the difference between internal and open source * models, this can create unintentional issues if relying on the underlying types directly */ get _upstreamsRawDatasets(): Array { const { upstreams = [] } = this; return upstreams.map((upstream): DatasetLineage['_rawDatasetData'] => upstream._rawDatasetData); } /** * Relates to the upstream dataset entities */ @relationship('datasets', '_upstreamsRawDatasets') upstreamDatasets!: Array; /** * Whether or not the dataset has been deprecated */ @alias('entity.deprecated') deprecated?: boolean; /** * Note attached to the deprecation process for this dataset */ @alias('entity.deprecationNote') deprecationNote?: string; /** * Timestamp for when the dataset was deprecated */ @alias('entity.decommissionTime') decommissionTime?: number; /** * Last timestamp for the modification of this dataset */ @oneWay('entity.modifiedTime') modifiedTime?: number; /** * Whether or not the entity has been removed. * Note: This overrides the BaseEntity implementation since DatasetEntity has a different behavior than * other entities */ @oneWay('entity.removed') removed!: boolean; /** * Description for the dataset that contains more information about the nature of the data or metadata */ @oneWay('entity.description') description?: string; /** * References the value of the healthScore from the underlying IDatasetEntity object * used in search results where the Health Score is shown as an attribute */ @reads('entity.health.score') healthScore?: number; /** * For open source support only, creates a reference to the customProperties, which can be found as an aspect of the * dataset entity. This helps to display various properties that may be specific to certain datasets and allows for * flexible display */ @oneWay('entity.customProperties') customProperties?: Com.Linkedin.Dataset.DatasetProperties['customProperties']; /** * Computes the custom dataset properties as a list of label and values rather than object mapping */ @computed('customProperties') get customDatasetProperties(): Array<{ label: string; value: string }> | void { const { customProperties } = this; if (customProperties) { return Object.keys(customProperties).map((key): { label: string; value: string } => ({ label: key, value: customProperties[key] })); } } /** * Platform native type, for example it can be TABLE or VIEW for Hive. */ @reads('entity.platformNativeType') platformNativeType?: Com.Linkedin.Dataset.Dataset['platformNativeType']; /** * Combined platform and native type, for example: Hive Table */ @computed('platformNativeType', 'platform') get platformAndNativeType(): string { return titleize(`${this.platform} ${this.platformNativeType?.toLocaleLowerCase()}`); } /** * gets the dataorigin field (needed from search) * from the urn */ @computed('urn') get dataorigin(): FabricType | undefined { return getDatasetUrnParts(this.urn).fabric; } /** * Reference to the data entity, is the data platform to which the dataset belongs */ @computed('entity.platform', 'urn') get platform(): DatasetPlatform | undefined { const { urn } = this; const parts = getDatasetUrnParts(urn); // TODO META-12937: Due to inconsistencies we can't rely on platform field // const platform = (entity && entity.platform) || parts.platform; const platform = parts.platform; // New API return platform with urn:li:dataPlatform:, in order to // make it compatible, we manually remove that part. const platformName = platform?.replace('urn:li:dataPlatform:', ''); return platformName as DatasetPlatform | undefined; } /** * Reference to the data entity's native name, should not be something that is editable but gives us a * more human readable form for the dataset vs the urn */ get name(): string { const { entity } = this; const name = (entity && entity.name) || ''; return name || getDatasetUrnParts(this.urn).prefix || this.urn; } /** * Reference to the constructed data platform once it is fetched from api */ currentDataPlatform?: IDataPlatform; /** * Class properties common across instances * Dictates how visual ui components should be rendered * Implemented as a getter to ensure that reads are idempotent * @readonly * @static * @type {IEntityRenderProps} */ static get renderProps(): IEntityRenderProps { return getRenderProps(); } /** * Is a promise that retrieves the compliance information (if it doesn't exist yet) and returns whether or not * the dataset contains personally identifiable information */ readPiiStatus(): Promise { return Promise.resolve().then(() => false); } /** * Is a promise that retrieves the schema information and returns and sets the schema information in a * thennable format */ async readSchema(): Promise { const schema = await readDatasetSchema(this.urn); const datasetSchema = new DatasetSchema(schema); set(this, 'schema', datasetSchema); return datasetSchema; } /** * Asynchronously retrieves the upstream datasets for this dataset and assigns to the upstreams property */ async readUpstreams(): Promise> { const lineageList = await readUpstreamDatasets(this.urn).catch( // Handling for an expected possibility of receiving a 404 error for this upstream dataset response, which // would require us to replace error with just an empty list getDefaultIfNotFoundError([] as DatasetLineageList) ); const upstreams = lineageList.map((lineage): DatasetLineage => new DatasetLineage(lineage)); set(this, 'upstreams', upstreams); return upstreams; } /** * Processes a save request to save the suggestion feedback given by the user, or inferred from the user action. * Since this is a background task, we do not await for a response from the server * @param suggestion - suggestion for which we are saving the feedback * @param feedback - whether the user has accepted or rejected the suggestion as accurate */ saveSuggestionFeedback(suggestion: DatasetComplianceSuggestion, feedback: SuggestionIntent): void { const { uid } = suggestion; saveDatasetComplianceSuggestionFeedbackByUrn(this.urn, uid, feedback); } /** * TODO META-11674 * Interim method until IDatasetEntity is removed */ get legacyDataset(): IDatasetEntity | void { if (this.entity) { return (this.entity as unknown) as IDatasetEntity; } } // TODO META-12149 this should be part of an Aspect. This fns can't live under BaseEntity as // then we would have a circular dependency: // BaseEntity -> InstitutionalMemory -> PersonEntity -> BaseEntity readInstitutionalMemory = readInstitutionalMemory; writeInstitutionalMemory = writeInstitutionalMemory; constructor(readonly urn: string, entityData?: Com.Linkedin.Dataset.Dataset) { super(urn); // Sometimes we do not need readEntity to get this information as it was already provided by another entity // and we can just instantiate the class with it if (entityData) { set(this, 'entity', entityData); } } @relationship('people', 'ownerUrns') datasetOwners?: Array; }