mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-11 08:54:00 +00:00
feat(sdk): structured properties - add support for listing (#12283)
This commit is contained in:
parent
42b2cd3e7d
commit
45450f19a0
@ -6,7 +6,7 @@ import TabItem from '@theme/TabItem';
|
||||
## Why Would You Use Structured Properties?
|
||||
|
||||
Structured properties are a structured, named set of properties that can be attached to logical entities like Datasets, DataJobs, etc.
|
||||
Structured properties have values that are types. Conceptually, they are like “field definitions”.
|
||||
Structured properties have values that are typed and support constraints.
|
||||
|
||||
Learn more about structured properties in the [Structured Properties Feature Guide](../../../docs/features/feature-guides/properties/overview.md).
|
||||
|
||||
@ -15,6 +15,7 @@ Learn more about structured properties in the [Structured Properties Feature Gui
|
||||
|
||||
This guide will show you how to execute the following actions with structured properties.
|
||||
- Create structured properties
|
||||
- List structured properties
|
||||
- Read structured properties
|
||||
- Delete structured properties
|
||||
- Add structured properties to a dataset
|
||||
@ -32,7 +33,8 @@ Additionally, you need to have the following tools installed according to the me
|
||||
<Tabs>
|
||||
<TabItem value="CLI" label="CLI" default>
|
||||
|
||||
Install the relevant CLI version. Forms are available as of CLI version `0.13.1`. The corresponding DataHub Cloud release version is `v0.2.16.5`
|
||||
Install the relevant CLI version.
|
||||
Structured Properties were introduced in version `0.13.1`, but we continuously improve and add new functionality, so you should always [upgrade](https://datahubproject.io/docs/cli/#installation) to the latest cli for best results.
|
||||
Connect to your instance via [init](https://datahubproject.io/docs/cli/#init):
|
||||
|
||||
- Run `datahub init` to update the instance you want to load into.
|
||||
@ -56,33 +58,8 @@ Requirements for OpenAPI are:
|
||||
The following code will create a structured property `io.acryl.privacy.retentionTime`.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="graphql" label="graphQL" default>
|
||||
|
||||
```graphql
|
||||
mutation createStructuredProperty {
|
||||
createStructuredProperty(
|
||||
input: {
|
||||
id: "retentionTime",
|
||||
qualifiedName:"retentionTime",
|
||||
displayName: "Retention Time",
|
||||
description: "Retention Time is used to figure out how long to retain records in a dataset",
|
||||
valueType: "urn:li:dataType:datahub.number",
|
||||
allowedValues: [
|
||||
{numberValue: 30, description: "30 days, usually reserved for datasets that are ephemeral and contain pii"},
|
||||
{numberValue: 90, description:"description: Use this for datasets that drive monthly reporting but contain pii"},
|
||||
{numberValue: 365, description:"Use this for non-sensitive data that can be retained for longer"}
|
||||
],
|
||||
cardinality: SINGLE,
|
||||
entityTypes: ["urn:li:entityType:datahub.dataset", "urn:li:entityType:datahub.dataFlow"],
|
||||
}
|
||||
) {
|
||||
urn
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="CLI" label="CLI">
|
||||
<TabItem value="CLI" label="CLI" default>
|
||||
|
||||
Create a yaml file representing the properties you’d like to load.
|
||||
For example, below file represents a property `io.acryl.privacy.retentionTime`. You can see the full example [here](https://github.com/datahub-project/datahub/blob/example-yaml-sp/metadata-ingestion/examples/structured_properties/struct_props.yaml).
|
||||
@ -108,13 +85,41 @@ For example, below file represents a property `io.acryl.privacy.retentionTime`.
|
||||
```
|
||||
|
||||
Use the CLI to create your properties:
|
||||
```commandline
|
||||
```shell
|
||||
datahub properties upsert -f {properties_yaml}
|
||||
```
|
||||
|
||||
If successful, you should see `Created structured property urn:li:structuredProperty:...`
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="Graphql" label="GraphQL" default>
|
||||
|
||||
```graphql
|
||||
mutation createStructuredProperty {
|
||||
createStructuredProperty(
|
||||
input: {
|
||||
id: "retentionTime",
|
||||
qualifiedName:"retentionTime",
|
||||
displayName: "Retention Time",
|
||||
description: "Retention Time is used to figure out how long to retain records in a dataset",
|
||||
valueType: "urn:li:dataType:datahub.number",
|
||||
allowedValues: [
|
||||
{numberValue: 30, description: "30 days, usually reserved for datasets that are ephemeral and contain pii"},
|
||||
{numberValue: 90, description:"description: Use this for datasets that drive monthly reporting but contain pii"},
|
||||
{numberValue: 365, description:"Use this for non-sensitive data that can be retained for longer"}
|
||||
],
|
||||
cardinality: SINGLE,
|
||||
entityTypes: ["urn:li:entityType:datahub.dataset", "urn:li:entityType:datahub.dataFlow"],
|
||||
}
|
||||
) {
|
||||
urn
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="OpenAPI v2" label="OpenAPI v2">
|
||||
|
||||
```shell
|
||||
@ -236,9 +241,182 @@ Example Response:
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Read Structured Properties
|
||||
## List Structured Properties
|
||||
|
||||
You can see the properties you created by running the following command:
|
||||
You can list all structured properties in your DataHub instance using the following methods:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="CLI" label="CLI" default>
|
||||
|
||||
```shell
|
||||
datahub properties list
|
||||
```
|
||||
|
||||
This will show all properties with their full details.
|
||||
|
||||
Example Response:
|
||||
```json
|
||||
{
|
||||
"urn": "urn:li:structuredProperty:clusterName",
|
||||
"qualified_name": "clusterName",
|
||||
"type": "urn:li:dataType:datahub.string",
|
||||
"description": "Test Cluster Name Property",
|
||||
"display_name": "Cluster's name",
|
||||
"entity_types": [
|
||||
"urn:li:entityType:datahub.dataset"
|
||||
],
|
||||
"cardinality": "SINGLE"
|
||||
}
|
||||
{
|
||||
"urn": "urn:li:structuredProperty:projectNames",
|
||||
"qualified_name": "projectNames",
|
||||
"type": "urn:li:dataType:datahub.string",
|
||||
"description": "Test property for project name",
|
||||
"display_name": "Project Name",
|
||||
"entity_types": [
|
||||
"urn:li:entityType:datahub.dataset",
|
||||
"urn:li:entityType:datahub.dataFlow"
|
||||
],
|
||||
"cardinality": "MULTIPLE",
|
||||
"allowed_values": [
|
||||
{
|
||||
"value": "Tracking",
|
||||
"description": "test value 1 for project"
|
||||
},
|
||||
{
|
||||
"value": "DataHub",
|
||||
"description": "test value 2 for project"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
If you only want to see the URNs, you can use:
|
||||
|
||||
```shell
|
||||
datahub properties list --no-details
|
||||
```
|
||||
|
||||
Example Response:
|
||||
```
|
||||
[2025-01-08 22:23:00,625] INFO {datahub.cli.specific.structuredproperties_cli:134} - Listing structured property urns only, use --details for more information
|
||||
urn:li:structuredProperty:clusterName
|
||||
urn:li:structuredProperty:clusterType
|
||||
urn:li:structuredProperty:io.acryl.dataManagement.deprecationDate
|
||||
urn:li:structuredProperty:projectNames
|
||||
```
|
||||
|
||||
To download all the structured property definitions into a single file that you can use with the `upsert` command as described in the [create section](#create-structured-properties), you can run the list command with the `--to-file` option.
|
||||
|
||||
```shell
|
||||
datahub properties list --to-file structured_properties.yaml
|
||||
```
|
||||
|
||||
Example Response:
|
||||
```yaml
|
||||
- urn: urn:li:structuredProperty:clusterName
|
||||
qualified_name: clusterName
|
||||
type: urn:li:dataType:datahub.string
|
||||
description: Test Cluster Name Property
|
||||
display_name: Cluster's name
|
||||
entity_types:
|
||||
- urn:li:entityType:datahub.dataset
|
||||
cardinality: SINGLE
|
||||
- urn: urn:li:structuredProperty:clusterType
|
||||
qualified_name: clusterType
|
||||
type: urn:li:dataType:datahub.string
|
||||
description: Test Cluster Type Property
|
||||
display_name: Cluster's type
|
||||
entity_types:
|
||||
- urn:li:entityType:datahub.dataset
|
||||
cardinality: SINGLE
|
||||
- urn: urn:li:structuredProperty:io.acryl.dataManagement.deprecationDate
|
||||
qualified_name: io.acryl.dataManagement.deprecationDate
|
||||
type: urn:li:dataType:datahub.date
|
||||
display_name: Deprecation Date
|
||||
entity_types:
|
||||
- urn:li:entityType:datahub.dataset
|
||||
- urn:li:entityType:datahub.dataFlow
|
||||
- urn:li:entityType:datahub.dataJob
|
||||
- urn:li:entityType:datahub.schemaField
|
||||
cardinality: SINGLE
|
||||
- urn: urn:li:structuredProperty:io.acryl.privacy.enumProperty5712
|
||||
qualified_name: io.acryl.privacy.enumProperty5712
|
||||
type: urn:li:dataType:datahub.string
|
||||
description: The retention policy for the dataset
|
||||
entity_types:
|
||||
- urn:li:entityType:datahub.dataset
|
||||
cardinality: MULTIPLE
|
||||
allowed_values:
|
||||
- value: foo
|
||||
- value: bar
|
||||
... etc.
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="OpenAPI v3" label="OpenAPI v3">
|
||||
|
||||
Example Request:
|
||||
```bash
|
||||
curl -X 'GET' \
|
||||
'http://localhost:9002/openapi/v3/entity/structuredproperty?systemMetadata=false&includeSoftDelete=false&skipCache=false&aspects=structuredPropertySettings&aspects=propertyDefinition&aspects=institutionalMemory&aspects=structuredPropertyKey&aspects=status&count=10&sortCriteria=urn&sortOrder=ASCENDING&query=*' \
|
||||
-H 'accept: application/json'
|
||||
```
|
||||
|
||||
Example Response:
|
||||
```json
|
||||
{
|
||||
"scrollId": "...",
|
||||
"entities": [
|
||||
{
|
||||
"urn": "urn:li:structuredProperty:clusterName",
|
||||
"propertyDefinition": {
|
||||
"value": {
|
||||
"immutable": false,
|
||||
"qualifiedName": "clusterName",
|
||||
"displayName": "Cluster's name",
|
||||
"valueType": "urn:li:dataType:datahub.string",
|
||||
"description": "Test Cluster Name Property",
|
||||
"entityTypes": [
|
||||
"urn:li:entityType:datahub.dataset"
|
||||
],
|
||||
"cardinality": "SINGLE"
|
||||
}
|
||||
},
|
||||
"structuredPropertyKey": {
|
||||
"value": {
|
||||
"id": "clusterName"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Key Query Parameters:
|
||||
- `count`: Number of results to return per page (default: 10)
|
||||
- `sortCriteria`: Field to sort by (default: urn)
|
||||
- `sortOrder`: Sort order (ASCENDING or DESCENDING)
|
||||
- `query`: Search query to filter properties (* for all)
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
The list endpoint returns all structured properties in your DataHub instance. Each property includes:
|
||||
- URN: Unique identifier for the property
|
||||
- Qualified Name: The property's qualified name
|
||||
- Type: The data type of the property (string, number, date, etc.)
|
||||
- Description: A description of the property's purpose
|
||||
- Display Name: Human-readable name for the property
|
||||
- Entity Types: The types of entities this property can be applied to
|
||||
- Cardinality: Whether the property accepts single (SINGLE) or multiple (MULTIPLE) values
|
||||
- Allowed Values: If specified, the list of allowed values for this property
|
||||
|
||||
## Read a single Structured Property
|
||||
|
||||
You can read an individual property you created by running the following command:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="CLI" label="CLI" default>
|
||||
@ -279,6 +457,91 @@ If successful, you should see metadata about your properties returned.
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="GraphQL" label="GraphQL">
|
||||
|
||||
Example Request:
|
||||
```graphql
|
||||
query {
|
||||
structuredProperty(urn: "urn:li:structuredProperty:projectNames") {
|
||||
urn
|
||||
type
|
||||
definition {
|
||||
qualifiedName
|
||||
displayName
|
||||
description
|
||||
cardinality
|
||||
allowedValues {
|
||||
value {
|
||||
... on StringValue {
|
||||
stringValue
|
||||
}
|
||||
... on NumberValue {
|
||||
numberValue
|
||||
}
|
||||
}
|
||||
description
|
||||
}
|
||||
entityTypes {
|
||||
urn
|
||||
info {
|
||||
type
|
||||
qualifiedName
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Example Response:
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"structuredProperty": {
|
||||
"urn": "urn:li:structuredProperty:projectNames",
|
||||
"type": "STRUCTURED_PROPERTY",
|
||||
"definition": {
|
||||
"qualifiedName": "projectNames",
|
||||
"displayName": "Project Name",
|
||||
"description": "Test property for project name",
|
||||
"cardinality": "MULTIPLE",
|
||||
"allowedValues": [
|
||||
{
|
||||
"value": {
|
||||
"stringValue": "Tracking"
|
||||
},
|
||||
"description": "test value 1 for project"
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"stringValue": "DataHub"
|
||||
},
|
||||
"description": "test value 2 for project"
|
||||
}
|
||||
],
|
||||
"entityTypes": [
|
||||
{
|
||||
"urn": "urn:li:entityType:datahub.dataset",
|
||||
"info": {
|
||||
"type": "DATASET",
|
||||
"qualifiedName": "datahub.dataset"
|
||||
}
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:entityType:datahub.dataFlow",
|
||||
"info": {
|
||||
"type": "DATA_FLOW",
|
||||
"qualifiedName": "datahub.dataFlow"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"extensions": {}
|
||||
}
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="OpenAPI v2" label="OpenAPI v2">
|
||||
@ -389,7 +652,7 @@ Example Response:
|
||||
This action will set/replace all structured properties on the entity. See PATCH operations to add/remove a single property.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="graphQL" label="GraphQL" default>
|
||||
<TabItem value="GraphQL" label="GraphQL" default>
|
||||
|
||||
```graphql
|
||||
mutation upsertStructuredProperties {
|
||||
@ -537,7 +800,7 @@ datahub dataset get --urn {urn}
|
||||
For reading all structured properties from a dataset:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="graphql" label="GraphQL" default>
|
||||
<TabItem value="Graphql" label="GraphQL" default>
|
||||
|
||||
```graphql
|
||||
query getDataset {
|
||||
|
@ -0,0 +1,12 @@
|
||||
# Usage: python3 list_structured_properties.py
|
||||
# Expected Output: List of structured properties
|
||||
# This script lists all structured properties in DataHub
|
||||
from datahub.api.entities.structuredproperties.structuredproperties import (
|
||||
StructuredProperties,
|
||||
)
|
||||
from datahub.ingestion.graph.client import get_default_graph
|
||||
|
||||
with get_default_graph() as graph:
|
||||
structuredproperties = StructuredProperties.list(graph)
|
||||
for structuredproperty in structuredproperties:
|
||||
print(structuredproperty.dict())
|
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from typing import Iterable, List, Optional
|
||||
|
||||
import yaml
|
||||
from pydantic import validator
|
||||
@ -226,3 +226,14 @@ class StructuredProperties(ConfigModel):
|
||||
yaml.indent(mapping=2, sequence=4, offset=2)
|
||||
yaml.default_flow_style = False
|
||||
yaml.dump(self.dict(), fp)
|
||||
|
||||
@staticmethod
|
||||
def list_urns(graph: DataHubGraph) -> Iterable[str]:
|
||||
return graph.get_urns_by_filter(
|
||||
entity_types=["structuredProperty"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def list(graph: DataHubGraph) -> Iterable["StructuredProperties"]:
|
||||
for urn in StructuredProperties.list_urns(graph):
|
||||
yield StructuredProperties.from_datahub(graph, urn)
|
||||
|
@ -1,9 +1,11 @@
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import click
|
||||
from click_default_group import DefaultGroup
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
from datahub.api.entities.structuredproperties.structuredproperties import (
|
||||
StructuredProperties,
|
||||
@ -61,3 +63,85 @@ def get(urn: str, to_file: str) -> None:
|
||||
)
|
||||
else:
|
||||
click.secho(f"Structured property {urn} does not exist")
|
||||
|
||||
|
||||
@properties.command(
|
||||
name="list",
|
||||
)
|
||||
@click.option("--details/--no-details", is_flag=True, default=True)
|
||||
@click.option("--to-file", required=False, type=str)
|
||||
@telemetry.with_telemetry()
|
||||
def list(details: bool, to_file: str) -> None:
|
||||
"""List structured properties in DataHub"""
|
||||
|
||||
def to_yaml_list(
|
||||
objects: Iterable[StructuredProperties], # iterable of objects to dump
|
||||
file: Path,
|
||||
) -> None:
|
||||
# if file exists, first we read it
|
||||
yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
|
||||
yaml.indent(mapping=2, sequence=4, offset=2)
|
||||
yaml.default_flow_style = False
|
||||
serialized_objects = []
|
||||
if file.exists():
|
||||
with open(file, "r") as fp:
|
||||
existing_objects = yaml.load(fp) # this is a list of dicts
|
||||
existing_objects = [
|
||||
StructuredProperties.parse_obj(obj) for obj in existing_objects
|
||||
]
|
||||
objects = [obj for obj in objects]
|
||||
# do a positional update of the existing objects
|
||||
existing_urns = {obj.urn for obj in existing_objects}
|
||||
# existing_urns = {obj["urn"] if "urn" in obj else f"urn:li:structuredProperty:{obj['id']}" for obj in existing_objects}
|
||||
for i, obj in enumerate(existing_objects):
|
||||
# existing_urn = obj["urn"] if "urn" in obj else f"urn:li:structuredProperty:{obj['id']}"
|
||||
existing_urn = obj.urn
|
||||
# breakpoint()
|
||||
if existing_urn in {obj.urn for obj in objects}:
|
||||
existing_objects[i] = next(
|
||||
obj.dict(exclude_unset=True, exclude_none=True)
|
||||
for obj in objects
|
||||
if obj.urn == existing_urn
|
||||
)
|
||||
new_objects = [
|
||||
obj.dict(exclude_unset=True, exclude_none=True)
|
||||
for obj in objects
|
||||
if obj.urn not in existing_urns
|
||||
]
|
||||
serialized_objects = existing_objects + new_objects
|
||||
else:
|
||||
serialized_objects = [
|
||||
obj.dict(exclude_unset=True, exclude_none=True) for obj in objects
|
||||
]
|
||||
|
||||
with open(file, "w") as fp:
|
||||
yaml.dump(serialized_objects, fp)
|
||||
|
||||
with get_default_graph() as graph:
|
||||
if details:
|
||||
logger.info(
|
||||
"Listing structured properties with details. Use --no-details for urns only"
|
||||
)
|
||||
structuredproperties = StructuredProperties.list(graph)
|
||||
if to_file:
|
||||
to_yaml_list(structuredproperties, Path(to_file))
|
||||
else:
|
||||
for structuredproperty in structuredproperties:
|
||||
click.secho(
|
||||
f"{json.dumps(structuredproperty.dict(exclude_unset=True, exclude_none=True), indent=2)}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Listing structured property urns only, use --details for more information"
|
||||
)
|
||||
structured_property_urns = StructuredProperties.list_urns(graph)
|
||||
if to_file:
|
||||
with open(to_file, "w") as f:
|
||||
for urn in structured_property_urns:
|
||||
f.write(f"{urn}\n")
|
||||
click.secho(
|
||||
f"Structured property urns written to {to_file}", fg="green"
|
||||
)
|
||||
else:
|
||||
for urn in structured_property_urns:
|
||||
click.secho(f"{urn}")
|
||||
|
@ -0,0 +1,213 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from datahub.api.entities.structuredproperties.structuredproperties import (
|
||||
AllowedValue,
|
||||
StructuredProperties,
|
||||
TypeQualifierAllowedTypes,
|
||||
)
|
||||
from datahub.ingestion.graph.client import DataHubGraph
|
||||
from datahub.metadata.schema_classes import (
|
||||
PropertyValueClass,
|
||||
StructuredPropertyDefinitionClass,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_yaml_content():
|
||||
return """
|
||||
- id: test_property
|
||||
type: string
|
||||
description: Test description
|
||||
display_name: Test Property
|
||||
entity_types:
|
||||
- dataset
|
||||
cardinality: SINGLE
|
||||
allowed_values:
|
||||
- value: test_value
|
||||
description: Test value description
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_yaml_file(tmp_path, sample_yaml_content):
|
||||
yaml_file = tmp_path / "test_properties.yaml"
|
||||
yaml_file.write_text(sample_yaml_content)
|
||||
return str(yaml_file)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_graph():
|
||||
return Mock(spec=DataHubGraph)
|
||||
|
||||
|
||||
def test_structured_properties_basic_creation():
|
||||
props = StructuredProperties(
|
||||
id="test_prop", type="string", description="Test description"
|
||||
)
|
||||
assert props.id == "test_prop"
|
||||
assert props.type == "urn:li:dataType:datahub.string"
|
||||
assert props.description == "Test description"
|
||||
assert props.urn == "urn:li:structuredProperty:test_prop"
|
||||
|
||||
|
||||
def test_structured_properties_validate_type():
|
||||
# Test valid types
|
||||
props = StructuredProperties(id="test", type="string")
|
||||
assert props.type == "urn:li:dataType:datahub.string"
|
||||
|
||||
# Test invalid type
|
||||
with pytest.raises(ValueError, match="Type .* is not allowed"):
|
||||
StructuredProperties(id="test", type="invalid_type")
|
||||
|
||||
|
||||
def test_structured_properties_validate_entity_types():
|
||||
# Test valid entity type
|
||||
props = StructuredProperties(id="test", type="string", entity_types=["dataset"])
|
||||
assert props.entity_types
|
||||
assert "urn:li:entityType:datahub.dataset" in props.entity_types
|
||||
|
||||
# Test invalid entity type
|
||||
with pytest.raises(ValueError, match="not a valid entity type"):
|
||||
StructuredProperties(id="test", type="string", entity_types=["invalid_entity"])
|
||||
|
||||
|
||||
def test_structured_properties_from_yaml(sample_yaml_file):
|
||||
props = StructuredProperties.from_yaml(sample_yaml_file)
|
||||
assert len(props) == 1
|
||||
assert props[0].id == "test_property"
|
||||
assert props[0].type == "urn:li:dataType:datahub.string"
|
||||
assert props[0].description == "Test description"
|
||||
assert props[0].display_name
|
||||
assert props[0].display_name == "Test Property"
|
||||
assert props[0].allowed_values
|
||||
assert len(props[0].allowed_values) == 1
|
||||
assert props[0].allowed_values[0].value == "test_value"
|
||||
|
||||
|
||||
def test_structured_properties_generate_mcps():
|
||||
props = StructuredProperties(
|
||||
id="test_prop",
|
||||
type="string",
|
||||
description="Test description",
|
||||
display_name="Test Property",
|
||||
entity_types=["dataset"],
|
||||
allowed_values=[
|
||||
AllowedValue(value="test_value", description="Test value description")
|
||||
],
|
||||
)
|
||||
|
||||
mcps = props.generate_mcps()
|
||||
assert len(mcps) == 1
|
||||
mcp = mcps[0]
|
||||
|
||||
assert mcp.entityUrn == "urn:li:structuredProperty:test_prop"
|
||||
assert isinstance(mcp.aspect, StructuredPropertyDefinitionClass)
|
||||
assert mcp.aspect.valueType == "urn:li:dataType:datahub.string"
|
||||
assert mcp.aspect.description == "Test description"
|
||||
assert mcp.aspect.allowedValues
|
||||
assert len(mcp.aspect.allowedValues) == 1
|
||||
assert mcp.aspect.allowedValues[0].value == "test_value"
|
||||
|
||||
|
||||
def test_structured_properties_from_datahub(mock_graph):
|
||||
mock_aspect = StructuredPropertyDefinitionClass(
|
||||
qualifiedName="test_prop",
|
||||
valueType="urn:li:dataType:datahub.string",
|
||||
displayName="Test Property",
|
||||
description="Test description",
|
||||
entityTypes=["urn:li:entityType:datahub.dataset"],
|
||||
cardinality="SINGLE",
|
||||
allowedValues=[
|
||||
PropertyValueClass(value="test_value", description="Test description")
|
||||
],
|
||||
)
|
||||
|
||||
mock_graph.get_aspect.return_value = mock_aspect
|
||||
|
||||
props = StructuredProperties.from_datahub(
|
||||
mock_graph, "urn:li:structuredProperty:test_prop"
|
||||
)
|
||||
|
||||
assert props.qualified_name == "test_prop"
|
||||
assert props.type == "urn:li:dataType:datahub.string"
|
||||
assert props.display_name == "Test Property"
|
||||
assert props.allowed_values
|
||||
assert len(props.allowed_values) == 1
|
||||
assert props.allowed_values[0].value == "test_value"
|
||||
|
||||
|
||||
def test_structured_properties_to_yaml(tmp_path):
|
||||
props = StructuredProperties(
|
||||
id="test_prop",
|
||||
type="string",
|
||||
description="Test description",
|
||||
allowed_values=[
|
||||
AllowedValue(value="test_value", description="Test value description")
|
||||
],
|
||||
)
|
||||
|
||||
yaml_file = tmp_path / "output.yaml"
|
||||
props.to_yaml(yaml_file)
|
||||
|
||||
# Verify the yaml file was created and contains expected content
|
||||
assert yaml_file.exists()
|
||||
with open(yaml_file) as f:
|
||||
content = yaml.safe_load(f)
|
||||
assert content["id"] == "test_prop"
|
||||
assert content["type"] == "urn:li:dataType:datahub.string"
|
||||
assert content["description"] == "Test description"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_type,expected_type",
|
||||
[
|
||||
("string", "urn:li:dataType:datahub.string"),
|
||||
("STRING", "urn:li:dataType:datahub.string"),
|
||||
("number", "urn:li:dataType:datahub.number"),
|
||||
("date", "urn:li:dataType:datahub.date"),
|
||||
],
|
||||
)
|
||||
def test_structured_properties_type_normalization(input_type, expected_type):
|
||||
props = StructuredProperties(id="test_prop", type=input_type)
|
||||
assert props.type == expected_type
|
||||
|
||||
|
||||
def test_structured_properties_type_qualifier():
|
||||
props = StructuredProperties(
|
||||
id="test_prop",
|
||||
type="urn",
|
||||
type_qualifier=TypeQualifierAllowedTypes(allowed_types=["dataset"]),
|
||||
)
|
||||
|
||||
mcps = props.generate_mcps()
|
||||
assert mcps[0].aspect
|
||||
assert mcps[0].aspect.typeQualifier["allowedTypes"] == [ # type: ignore
|
||||
"urn:li:entityType:datahub.dataset"
|
||||
]
|
||||
|
||||
|
||||
def test_structured_properties_list(mock_graph):
|
||||
mock_graph.get_urns_by_filter.return_value = [
|
||||
"urn:li:structuredProperty:prop1",
|
||||
"urn:li:structuredProperty:prop2",
|
||||
]
|
||||
|
||||
mock_aspect = StructuredPropertyDefinitionClass(
|
||||
qualifiedName="test_prop",
|
||||
valueType="urn:li:dataType:string",
|
||||
entityTypes=["urn:li:entityType:datahub.dataset"],
|
||||
)
|
||||
mock_graph.get_aspect.return_value = mock_aspect
|
||||
|
||||
props = list(StructuredProperties.list(mock_graph))
|
||||
|
||||
# Verify get_urns_by_filter was called with correct arguments
|
||||
mock_graph.get_urns_by_filter.assert_called_once_with(
|
||||
entity_types=["structuredProperty"]
|
||||
)
|
||||
|
||||
assert len(props) == 2
|
||||
assert all(isinstance(prop, StructuredProperties) for prop in props)
|
@ -839,3 +839,49 @@ def test_dataset_structured_property_delete(ingest_cleanup_data, graph_client, c
|
||||
# Validate search works for property #1 & #2
|
||||
validate_search(property1.qualified_name, expected=[])
|
||||
validate_search(property2.qualified_name, expected=[dataset_urns[0]])
|
||||
|
||||
|
||||
def test_structured_properties_list(ingest_cleanup_data, graph_client, caplog):
|
||||
# Create property, assign value to target dataset urn
|
||||
def create_property():
|
||||
property_name = f"listTest{randint(10, 10000)}Property"
|
||||
value_type = "string"
|
||||
property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}"
|
||||
|
||||
create_property_definition(
|
||||
property_name=property_name,
|
||||
graph=graph_client,
|
||||
value_type=value_type,
|
||||
cardinality="SINGLE",
|
||||
)
|
||||
|
||||
test_property = StructuredProperties.from_datahub(
|
||||
graph=graph_client, urn=property_urn
|
||||
)
|
||||
assert test_property is not None
|
||||
|
||||
return test_property
|
||||
|
||||
# create 2 structured properties
|
||||
property1 = create_property()
|
||||
property2 = create_property()
|
||||
wait_for_writes_to_sync()
|
||||
|
||||
# validate that urns are in the list
|
||||
structured_properties_urns = [
|
||||
u for u in StructuredProperties.list_urns(graph_client)
|
||||
]
|
||||
assert property1.urn in structured_properties_urns
|
||||
assert property2.urn in structured_properties_urns
|
||||
|
||||
# list structured properties (full)
|
||||
structured_properties = StructuredProperties.list(graph_client)
|
||||
matched_properties = [
|
||||
p for p in structured_properties if p.urn in [property1.urn, property2.urn]
|
||||
]
|
||||
assert len(matched_properties) == 2
|
||||
retrieved_property1 = next(p for p in matched_properties if p.urn == property1.urn)
|
||||
retrieved_property2 = next(p for p in matched_properties if p.urn == property2.urn)
|
||||
|
||||
assert property1.dict() == retrieved_property1.dict()
|
||||
assert property2.dict() == retrieved_property2.dict()
|
||||
|
Loading…
x
Reference in New Issue
Block a user