Ignacio Bona 96af9c7ed6 OS Fixes: Hand tailor types, merge dataset-entity, removed unused types
{COMMIT-SYNC:8532a92fef64760adebfc54fa2ad4c3acf361b41}
2020-11-17 12:38:21 -08:00

372 lines
14 KiB
TypeScript

import { BaseEntity, IBaseEntityStatics, statics } from '@datahub/data-models/entity/base-entity';
import { saveDatasetComplianceSuggestionFeedbackByUrn } from '@datahub/data-models/api/dataset/compliance';
import { readDatasetSchema } from '@datahub/data-models/api/dataset/schema';
import DatasetSchema from '@datahub/data-models/entity/dataset/modules/schema';
import { set, setProperties, computed } from '@ember/object';
import { IEntityRenderProps } from '@datahub/data-models/types/entity/rendering/entity-render-props';
import { DatasetExportPolicy } from '@datahub/data-models/entity/dataset/modules/export-policy';
import { DatasetPurgePolicy } from '@datahub/data-models/entity/dataset/modules/purge-policy';
import { DatasetPlatform } from '@datahub/metadata-types/constants/entity/dataset/platform';
import { decodeUrn } from '@datahub/utils/validators/urn';
import { IDataPlatform } from '@datahub/metadata-types/types/entity/dataset/platform';
import { DatasetLineage } from '@datahub/data-models/entity/dataset/modules/lineage';
import { readUpstreamDatasets } from '@datahub/data-models/api/dataset/lineage';
import { DatasetLineageList } from '@datahub/metadata-types/types/entity/dataset/lineage';
import { getRenderProps } from '@datahub/data-models/entity/dataset/render-props';
import { NotImplementedError } from '@datahub/data-models/constants/entity/shared';
import { every } from 'lodash-es';
import DatasetComplianceSuggestion from '@datahub/data-models/entity/dataset/modules/compliance-suggestion';
import { SuggestionIntent } from '@datahub/data-models/constants/entity/dataset/compliance-suggestions';
import { FabricType } from '@datahub/metadata-types/constants/common/fabric-type';
import { getDatasetUrnParts } from '@datahub/data-models/entity/dataset/utils/urn';
import { readDataPlatforms } from '@datahub/data-models/api/dataset/platforms';
import { getDefaultIfNotFoundError } from '@datahub/utils/api/error';
import { Snapshot } from '@datahub/metadata-types/types/metadata/snapshot';
import { oneWay, alias, reads } from '@ember/object/computed';
import { IDatasetEntity } from '@datahub/metadata-types/types/entity/dataset/dataset-entity';
import { readDatasetOwnersByUrn } from '@datahub/data-models/api/dataset/ownership';
import { transformOwnersResponseIntoOwners } from '@datahub/data-models/entity/dataset/utils/owner';
import { readInstitutionalMemory, writeInstitutionalMemory } from '@datahub/data-models/entity/institutional-memory';
import { relationship } from '@datahub/data-models/relationships/decorator';
import { PersonEntity } from '@datahub/data-models/entity/person/person-entity';
import titleize from '@nacho-ui/core/utils/strings/titleize';
/**
* Defines the data model for the Dataset entity.
*/
@statics<IBaseEntityStatics<Com.Linkedin.Dataset.Dataset>>()
export class DatasetEntity extends BaseEntity<Com.Linkedin.Dataset.Dataset> {
/**
* The human friendly alias for Dataset entities
*/
static displayName: 'datasets' = 'datasets';
// TODO: [META-9120] This logic should not exist on the UI and is a temporary fix that will hopefully one day
// get changed and read from API level instead. Also it won't matter once we fully migrate to new compliance
/**
* Calculates whether or not to show this dataset compliance as inherited from parents based on the nature
* of the upstream releationships
* @param upstreams - representation of upstream lineage information. Typing can change if used in data-portal
*/
static hasInheritedCompliance<T extends { type: string }>(upstreams: Array<T> = []): boolean {
if (!upstreams.length) {
return false;
}
// Rule 1: If all upstreams are transformed, then we don't want to show this as inherited compliance
const upstreamsAreAllTransformed: boolean = every(
upstreams,
(upstream): boolean => upstream.type === 'TRANSFORMED'
);
return !upstreamsAreAllTransformed;
}
/**
* There is no snapshot for datasets, returning undefined but
* implementing since, framework may call it
*/
get readSnapshot(): Promise<undefined> | Promise<Snapshot> {
return Promise.resolve(undefined);
}
/**
* Additional hook to fetch the platforms needed for some operation in datasets
*/
async onAfterCreate(): Promise<void> {
const dataPlatforms: Array<IDataPlatform> = await readDataPlatforms();
const currentDataPlatform = dataPlatforms.find((platform): boolean => platform.name === this.platform);
// Reads dataset owner information from the resolved ownership endpoint
const ownersResponse = await readDatasetOwnersByUrn(this.urn);
// Transforms the owners response into a more generic owners response and sets it on the entity
const owners = transformOwnersResponseIntoOwners(ownersResponse);
setProperties(this, { owners, currentDataPlatform });
}
get displayName(): 'datasets' {
return DatasetEntity.displayName;
}
/**
* Discriminant when included in a tagged Entity union
*/
static kind = 'DatasetEntity';
get kind(): string {
return DatasetEntity.kind;
}
/**
* Shows the urn as an unencoded value
*/
get rawUrn(): string {
return decodeUrn(this.urn);
}
/**
* Returns the fabric/environment for this particular dataset entity
*/
get fabric(): FabricType | undefined {
throw new Error(NotImplementedError);
}
/**
* Reads the snapshots for a list of Dataset urns
* @static
*/
static readSnapshots(_urns: Array<string>): Promise<Array<Com.Linkedin.Metadata.Snapshot.DatasetSnapshot>> {
throw new Error(NotImplementedError);
}
/**
* Holds the classified data retrieved from the API layer for the schema information as a DatasetSchema
* class
* @type {DatasetSchema}
*/
schema?: DatasetSchema;
/**
* Holds the classified data retrieved from the API layer for the export policy as a DatasetExportPolicy
* class
* @type {DatasetExportPolicy}
*/
exportPolicy?: DatasetExportPolicy;
/**
* Holds the classified data retrieved from the API layer for the purge policy as a DatasetPurgePOlicy
* class
* @type {DatasetPurgePolicy}
*/
purgePolicy?: DatasetPurgePolicy;
/**
* Holds the classified data retrieved from the API layer for the upstream datasets list as an array of
* DatasetLineage classes
* @type {Array<DatasetLineage>}
*/
upstreams?: Array<DatasetLineage>;
/**
* Allows us to, instead of relying on the actual lineage objects for fetching the upstream datasets, create a
* connection to the raw information within the instantiated lineage piece to form a relationship to other dataset
* entities
* @note WARNING - do not use this property directly. Because of the difference between internal and open source
* models, this can create unintentional issues if relying on the underlying types directly
*/
get _upstreamsRawDatasets(): Array<DatasetLineage['_rawDatasetData']> {
const { upstreams = [] } = this;
return upstreams.map((upstream): DatasetLineage['_rawDatasetData'] => upstream._rawDatasetData);
}
/**
* Relates to the upstream dataset entities
*/
@relationship('datasets', '_upstreamsRawDatasets')
upstreamDatasets!: Array<DatasetEntity>;
/**
* Whether or not the dataset has been deprecated
*/
@alias('entity.deprecated')
deprecated?: boolean;
/**
* Note attached to the deprecation process for this dataset
*/
@alias('entity.deprecationNote')
deprecationNote?: string;
/**
* Timestamp for when the dataset was deprecated
*/
@alias('entity.decommissionTime')
decommissionTime?: number;
/**
* Last timestamp for the modification of this dataset
*/
@oneWay('entity.modifiedTime')
modifiedTime?: number;
/**
* Whether or not the entity has been removed.
* Note: This overrides the BaseEntity implementation since DatasetEntity has a different behavior than
* other entities
*/
@oneWay('entity.removed')
removed!: boolean;
/**
* Description for the dataset that contains more information about the nature of the data or metadata
*/
@oneWay('entity.description')
description?: string;
/**
* References the value of the healthScore from the underlying IDatasetEntity object
* used in search results where the Health Score is shown as an attribute
*/
@reads('entity.health.score')
healthScore?: number;
/**
* For open source support only, creates a reference to the customProperties, which can be found as an aspect of the
* dataset entity. This helps to display various properties that may be specific to certain datasets and allows for
* flexible display
*/
@oneWay('entity.customProperties')
customProperties?: Com.Linkedin.Dataset.DatasetProperties['customProperties'];
/**
* Computes the custom dataset properties as a list of label and values rather than object mapping
*/
@computed('customProperties')
get customDatasetProperties(): Array<{ label: string; value: string }> | void {
const { customProperties } = this;
if (customProperties) {
return Object.keys(customProperties).map((key): { label: string; value: string } => ({
label: key,
value: customProperties[key]
}));
}
}
/**
* Platform native type, for example it can be TABLE or VIEW for Hive.
*/
@reads('entity.platformNativeType')
platformNativeType?: Com.Linkedin.Dataset.Dataset['platformNativeType'];
/**
* Combined platform and native type, for example: Hive Table
*/
@computed('platformNativeType', 'platform')
get platformAndNativeType(): string {
return titleize(`${this.platform} ${this.platformNativeType?.toLocaleLowerCase()}`);
}
/**
* gets the dataorigin field (needed from search)
* from the urn
*/
@computed('urn')
get dataorigin(): FabricType | undefined {
return getDatasetUrnParts(this.urn).fabric;
}
/**
* Reference to the data entity, is the data platform to which the dataset belongs
*/
@computed('entity.platform', 'urn')
get platform(): DatasetPlatform | undefined {
const { urn } = this;
const parts = getDatasetUrnParts(urn);
// TODO META-12937: Due to inconsistencies we can't rely on platform field
// const platform = (entity && entity.platform) || parts.platform;
const platform = parts.platform;
// New API return platform with urn:li:dataPlatform:, in order to
// make it compatible, we manually remove that part.
const platformName = platform?.replace('urn:li:dataPlatform:', '');
return platformName as DatasetPlatform | undefined;
}
/**
* Reference to the data entity's native name, should not be something that is editable but gives us a
* more human readable form for the dataset vs the urn
*/
get name(): string {
const { entity } = this;
const name = (entity && entity.name) || '';
return name || getDatasetUrnParts(this.urn).prefix || this.urn;
}
/**
* Reference to the constructed data platform once it is fetched from api
*/
currentDataPlatform?: IDataPlatform;
/**
* Class properties common across instances
* Dictates how visual ui components should be rendered
* Implemented as a getter to ensure that reads are idempotent
* @readonly
* @static
* @type {IEntityRenderProps}
*/
static get renderProps(): IEntityRenderProps {
return getRenderProps();
}
/**
* Is a promise that retrieves the compliance information (if it doesn't exist yet) and returns whether or not
* the dataset contains personally identifiable information
*/
readPiiStatus(): Promise<boolean> {
return Promise.resolve().then(() => false);
}
/**
* Is a promise that retrieves the schema information and returns and sets the schema information in a
* thennable format
*/
async readSchema(): Promise<DatasetSchema> {
const schema = await readDatasetSchema(this.urn);
const datasetSchema = new DatasetSchema(schema);
set(this, 'schema', datasetSchema);
return datasetSchema;
}
/**
* Asynchronously retrieves the upstream datasets for this dataset and assigns to the upstreams property
*/
async readUpstreams(): Promise<Array<DatasetLineage>> {
const lineageList = await readUpstreamDatasets(this.urn).catch(
// Handling for an expected possibility of receiving a 404 error for this upstream dataset response, which
// would require us to replace error with just an empty list
getDefaultIfNotFoundError([] as DatasetLineageList)
);
const upstreams = lineageList.map((lineage): DatasetLineage => new DatasetLineage(lineage));
set(this, 'upstreams', upstreams);
return upstreams;
}
/**
* Processes a save request to save the suggestion feedback given by the user, or inferred from the user action.
* Since this is a background task, we do not await for a response from the server
* @param suggestion - suggestion for which we are saving the feedback
* @param feedback - whether the user has accepted or rejected the suggestion as accurate
*/
saveSuggestionFeedback(suggestion: DatasetComplianceSuggestion, feedback: SuggestionIntent): void {
const { uid } = suggestion;
saveDatasetComplianceSuggestionFeedbackByUrn(this.urn, uid, feedback);
}
/**
* TODO META-11674
* Interim method until IDatasetEntity is removed
*/
get legacyDataset(): IDatasetEntity | void {
if (this.entity) {
return (this.entity as unknown) as IDatasetEntity;
}
}
// TODO META-12149 this should be part of an Aspect. This fns can't live under BaseEntity as
// then we would have a circular dependency:
// BaseEntity -> InstitutionalMemory -> PersonEntity -> BaseEntity
readInstitutionalMemory = readInstitutionalMemory;
writeInstitutionalMemory = writeInstitutionalMemory;
constructor(readonly urn: string, entityData?: Com.Linkedin.Dataset.Dataset) {
super(urn);
// Sometimes we do not need readEntity to get this information as it was already provided by another entity
// and we can just instantiate the class with it
if (entityData) {
set(this, 'entity', entityData);
}
}
@relationship('people', 'ownerUrns')
datasetOwners?: Array<PersonEntity>;
}