Fix #4768 - Add Location path (#4770)

Co-authored-by: Sriharsha Chintalapani <harsha@getcollate.io>
This commit is contained in:
Pere Miquel Brull 2022-05-08 06:10:13 +02:00 committed by GitHub
parent 980c27f831
commit 5700f6d68e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 66 additions and 33 deletions

View File

@ -42,9 +42,9 @@ import org.openmetadata.catalog.util.ResultList;
public class LocationRepository extends EntityRepository<Location> {
// Location fields that can be patched in a PATCH request
private static final String LOCATION_PATCH_FIELDS = "owner,tags";
private static final String LOCATION_PATCH_FIELDS = "owner,tags,path";
// Location fields that can be updated in a PUT request
private static final String LOCATION_UPDATE_FIELDS = "owner,tags";
private static final String LOCATION_UPDATE_FIELDS = "owner,tags,path";
public LocationRepository(CollectionDAO dao) {
super(
@ -60,6 +60,7 @@ public class LocationRepository extends EntityRepository<Location> {
@Override
public Location setFields(Location location, Fields fields) throws IOException {
location.setService(getService(location));
location.setPath(location.getPath());
location.setOwner(fields.contains(FIELD_OWNER) ? getOwner(location) : null);
location.setFollowers(fields.contains(FIELD_FOLLOWERS) ? getFollowers(location) : null);
location.setTags(fields.contains(FIELD_TAGS) ? getTags(location.getFullyQualifiedName()) : null);
@ -379,6 +380,7 @@ public class LocationRepository extends EntityRepository<Location> {
@Override
public void entitySpecificUpdate() throws IOException {
recordChange("locationType", original.getEntity().getLocationType(), updated.getEntity().getLocationType());
recordChange("path", original.getEntity().getPath(), updated.getEntity().getPath());
}
}
}

View File

@ -93,7 +93,7 @@ public class LocationResource extends EntityResource<Location, LocationRepositor
}
}
static final String FIELDS = "owner,followers,tags";
static final String FIELDS = "owner,followers,tags,path";
@GET
@Operation(
@ -448,6 +448,7 @@ public class LocationResource extends EntityResource<Location, LocationRepositor
return new Location()
.withId(UUID.randomUUID())
.withName(create.getName())
.withPath(create.getPath())
.withDescription(create.getDescription())
.withService(create.getService())
.withLocationType(create.getLocationType())

View File

@ -7,7 +7,11 @@
"properties": {
"name": {
"description": "Name that identifies this Location.",
"$ref": "../../entity/data/location.json#/definitions/locationName"
"$ref": "../../type/basic.json#/definitions/entityName"
},
"path": {
"description": "Location full path.",
"type": "string"
},
"description": {
"description": "Description of the location instance.",

View File

@ -6,10 +6,6 @@
"type": "object",
"javaType": "org.openmetadata.catalog.entity.data.Location",
"definitions": {
"locationName": {
"description": "Local name (not fully qualified name) of a location.",
"$ref": "../../type/basic.json#/definitions/entityName"
},
"locationType": {
"javaType": "org.openmetadata.catalog.type.LocationType",
"description": "This schema defines the type used for describing different types of Location.",
@ -40,8 +36,12 @@
"$ref": "../../type/basic.json#/definitions/uuid"
},
"name": {
"description": "Name of a location without the service. For example s3://bucket/path1/path2.",
"$ref": "#/definitions/locationName"
"description": "Name of a location",
"$ref": "../../type/basic.json#/definitions/entityName"
},
"path": {
"description": "Location full path",
"type": "string"
},
"displayName": {
"description": "Display Name that identifies this table. It could be title or label from the source services.",

View File

@ -63,6 +63,7 @@ public class LocationResourceTest extends EntityResourceTest<Location, CreateLoc
public CreateLocation createRequest(String name, String description, String displayName, EntityReference owner) {
return new CreateLocation()
.withName(name)
.withPath(name)
.withService(getContainer())
.withDescription(description)
.withOwner(owner);
@ -81,7 +82,7 @@ public class LocationResourceTest extends EntityResourceTest<Location, CreateLoc
createRequest.getDescription(),
TestUtils.getPrincipal(authHeaders),
createRequest.getOwner());
assertEquals(createRequest.getPath(), location.getPath());
// Validate service
EntityReference expectedService = createRequest.getService();
if (expectedService != null) {
@ -172,7 +173,6 @@ public class LocationResourceTest extends EntityResourceTest<Location, CreateLoc
// Create location for each service and test APIs
for (EntityReference service : differentServices) {
createAndCheckEntity(createRequest(test).withService(service), ADMIN_AUTH_HEADERS);
// List locations by filtering on service name and ensure right locations are returned
Map<String, String> queryParams = new HashMap<>();
queryParams.put("service", service.getName());

View File

@ -1,20 +1,23 @@
{
"locations": [
{
"name": "s3://bucket-a",
"displayName": "s3://bucket-a",
"name": "bucket_a",
"path": "s3://bucket-a",
"displayName": "Bucket A",
"description": "Bucket A",
"locationType": "Bucket"
},
{
"name": "s3://bucket-b",
"displayName": "s3://bucket-b",
"name": "bucket_b",
"path": "s3://bucket-b",
"displayName": "Bucket B",
"description": "Bucket B",
"locationType": "Bucket"
},
{
"name": "s3://bucket-a/user/hive/dwh",
"displayName": "s3://bucket-a/user/hive/dwh",
"name": "hive_dwh",
"path": "s3://bucket-a/user/hive/dwh",
"displayName": "Hive DWH",
"description": "Bucket A prefix",
"locationType": "Prefix"
}

View File

@ -384,6 +384,7 @@ class MetadataRestSink(Sink[Entity]):
def _create_location(self, location: Location) -> Location:
location_request = CreateLocationRequest(
name=location.name,
path=location.path,
description=location.description,
locationType=location.locationType,
tags=location.tags,

View File

@ -89,12 +89,13 @@ class GcsSource(Source[Entity]):
try:
for bucket in self.gcs.list_buckets():
self.status.scanned(bucket.name)
location_name = self._get_bucket_name_with_prefix(bucket.name)
location_path = self._get_bucket_name_with_prefix(bucket.name)
location_id = uuid.uuid4()
location = Location(
id=location_id,
name=location_name,
displayName=location_name,
name=bucket.name,
path=location_path,
displayName=bucket.name,
locationType=LocationType.Bucket,
service=EntityReference(
id=self.service.id,

View File

@ -11,7 +11,7 @@
import traceback
import uuid
from typing import Iterable, List
from typing import Iterable, List, Optional
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
@ -185,13 +185,7 @@ class GlueSource(Source[Entity]):
)
table_columns = self.get_columns(table["StorageDescriptor"])
location_entity = Location(
name=table["StorageDescriptor"]["Location"],
locationType=location_type,
service=EntityReference(
id=self.storage_service.id, type="storageService"
),
)
location_entity = self.get_table_location(table, location_type)
table_type: TableType = TableType.Regular
if location_type == LocationType.Iceberg:
@ -221,6 +215,30 @@ class GlueSource(Source[Entity]):
logger.debug(traceback.format_exc())
logger.error(err)
def get_table_location(
self, table: dict, location_type: LocationType
) -> Optional[Location]:
"""
Try to create the location or return None
:param table: Table dict from boto3
:param location_type: Table or Iceberg
:return: Location or None
"""
try:
return Location(
name=table["Name"][:128], # set location name as table name
path=table["StorageDescriptor"]["Location"],
locationType=location_type,
service=EntityReference(
id=self.storage_service.id, type="storageService"
),
)
except Exception as err:
logger.error(f"Cannot create location for {table['Name']} due to {err}")
logger.debug(traceback.format_exc())
return None
def get_downstream_tasks(self, task_unique_id, tasks):
downstream_tasks = []
for edges in tasks["Edges"]:

View File

@ -77,12 +77,13 @@ class S3Source(Source[Entity]):
for bucket in buckets_response["Buckets"]:
bucket_name = bucket["Name"]
self.status.scanned(bucket_name)
location_name = self._get_bucket_name_with_prefix(bucket_name)
location_path = self._get_bucket_name_with_prefix(bucket_name)
location_id = uuid.uuid4()
location = Location(
id=location_id,
name=location_name,
displayName=location_name,
name=bucket_name,
path=location_path,
displayName=bucket_name,
locationType=LocationType.Bucket,
service=EntityReference(
id=self.service.id,

View File

@ -377,6 +377,7 @@ class SampleDataSource(Source[Entity]):
location_ev = Location(
id=uuid.uuid4(),
name=location["name"],
path=location["path"],
displayName=location["displayName"],
description=location["description"],
locationType=location["locationType"],
@ -420,7 +421,8 @@ class SampleDataSource(Source[Entity]):
)
location_metadata = Location(
id=uuid.uuid4(),
name="s3://glue_bucket/dwh/schema/" + table["name"],
name=table["name"],
path="s3://glue_bucket/dwh/schema/" + table["name"],
description=table["description"],
locationType=location_type,
service=EntityReference(