[Issue-877] - High Level API (#890)

* First approach

* Test model

* Validate endpoints

* Update schema codegen

* Prepare reqs dev

* Rename titles and codegen

* Update README

* Keep lineage title as addLineage

* Update pydantic classes

* Update API wrapper & tests

* Handle services methods

* Prepare tests
This commit is contained in:
Pere Miquel Brull 2021-10-21 23:51:38 +02:00 committed by GitHub
parent 53eb2aad21
commit f91bcc03f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
93 changed files with 1113 additions and 134 deletions

View File

@ -13,6 +13,9 @@ install:
install_test:
pip install -r ingestion/requirements-test.txt
install_dev:
pip install -r ingestion/requirements-dev.txt
precommit_install:
@echo "Installing pre-commit hooks"
@echo "Make sure to first run `make install_test`"
@ -29,3 +32,6 @@ black:
black_check:
black --check --diff $(PY_SOURCE) --exclude $(PY_SOURCE)/metadata/generated
generate:
datamodel-codegen --input catalog-rest-service/src/main/resources/json --input-file-type jsonschema --output ingestion/src/metadata/generated

View File

@ -1,7 +1,7 @@
{
"$id": "https://github.com/open-metadata/OpenMetadata/blob/main/catalog-rest-service/src/main/resources/json/schema/api/data/createTopic.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Create topic",
"title": "Create topic entity request",
"description": "Create a topic entity request",
"type": "object",
"properties": {

View File

@ -1,7 +1,7 @@
{
"$id": "https://github.com/open-metadata/OpenMetadata/blob/main/catalog-rest-service/src/main/resources/json/schema/api/feed/createThread.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Create thread request",
"title": "Create thread entity request",
"description": "Create thread request",
"type": "object",

View File

@ -1,7 +1,7 @@
{
"$id": "https://github.com/open-metadata/OpenMetadata/blob/main/catalog-rest-service/src/main/resources/json/schema/api/tags/createTag.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Create tag API request",
"title": "Create tag entity request",
"description": "Create tag API request",
"type": "object",
"javaType": "org.openmetadata.catalog.type.CreateTag",

View File

@ -1,7 +1,7 @@
{
"$id": "https://github.com/open-metadata/OpenMetadata/blob/main/catalog-rest-service/src/main/resources/json/schema/api/tags/createTagCategory.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Create tag category request",
"title": "Create tag category entity request",
"description": "Create tag category request",
"type": "object",
"javaType": "org.openmetadata.catalog.type.CreateTagCategory",

View File

@ -1,7 +1,7 @@
{
"$id": "https://github.com/open-metadata/OpenMetadata/blob/main/catalog-rest-service/src/main/resources/json/schema/api/teams/createTeam.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Team entity",
"title": "Create team entity request",
"description": "Team entity",
"type": "object",

View File

@ -1,7 +1,7 @@
{
"$id": "https://github.com/open-metadata/OpenMetadata/blob/main/catalog-rest-service/src/main/resources/json/schema/api/teams/createUser.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Request to create User entity",
"title": "Create user entity request",
"description": "Request to create User entity",
"type": "object",

View File

@ -70,3 +70,9 @@ docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elas
```text
metadata ingest -c ./pipelines/metadata_to_es.json
```
## Generated sources
We are using `datamodel-codegen` to get some `pydantic` classes inside the `generated` module from the JSON Schemas defining the API and Entities.
This tool bases the class name on the `title` of the JSON Schema (vs. Java POJO, which uses the file name). Note that this convention is important for us, as having a standardized approach in creating the titles helps us create generic code capable of tackling multiple Type Variables.

View File

@ -0,0 +1 @@
datamodel-code-generator==0.11.14

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: data/tags/personalDataTags.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: data/tags/piiTags.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: data/tags/tierTags.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: data/tags/userTags.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/catalogVersion.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createChart.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createDashboard.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createDatabase.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createModel.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createPipeline.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createTable.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createTask.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/data/createTopic.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -12,7 +12,7 @@ from ...entity.data import topic
from ...type import entityReference, tagLabel
class CreateTopic(BaseModel):
class CreateTopicEntityRequest(BaseModel):
name: topic.TopicName = Field(
..., description='Name that identifies this topic instance uniquely.'
)

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/feed/createThread.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -9,7 +9,7 @@ from pydantic import BaseModel, Field
from ...type import basic
class CreateThreadRequest(BaseModel):
class CreateThreadEntityRequest(BaseModel):
message: str = Field(..., description='Message')
from_: basic.Uuid = Field(
...,

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/lineage/addLineage.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/createDashboardService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/createDatabaseService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/createMessagingService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/createPipelineService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/updateDashboardService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/updateDatabaseService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/updateMessagingService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/services/updatePipelineService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/setOwner.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/tags/createTag.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -11,7 +11,7 @@ from pydantic import BaseModel, Field
from ...entity.tags import tagCategory
class CreateTagApiRequest(BaseModel):
class CreateTagEntityRequest(BaseModel):
name: tagCategory.TagName
description: str = Field(..., description='Unique name of the tag category')
associatedTags: Optional[List[str]] = Field(

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/tags/createTagCategory.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -9,7 +9,7 @@ from pydantic import BaseModel, Field
from ...entity.tags import tagCategory
class CreateTagCategoryRequest(BaseModel):
class CreateTagCategoryEntityRequest(BaseModel):
name: tagCategory.TagName
description: str = Field(..., description='Description of the tag category')
categoryType: tagCategory.TagCategoryType

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/teams/createTeam.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -12,7 +12,7 @@ from ...entity.teams import team
from ...type import basic, profile
class TeamEntity(BaseModel):
class CreateTeamEntityRequest(BaseModel):
name: team.TeamName
displayName: Optional[str] = Field(
None,

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/api/teams/createUser.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -12,7 +12,7 @@ from ...entity.teams import user
from ...type import basic, profile
class RequestToCreateUserEntity(BaseModel):
class CreateUserEntityRequest(BaseModel):
name: user.UserName
displayName: Optional[str] = Field(
None, description="Name used for display purposes. Example 'FirstName LastName'"

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/bots.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -23,6 +23,14 @@ class Bot(BaseModel):
description="Name used for display purposes. Example 'FirstName LastName'.",
)
description: Optional[str] = Field(None, description='Description of the bot.')
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to this bot.'
)

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/chart.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -43,6 +43,14 @@ class Chart(BaseModel):
description: Optional[str] = Field(
None, description='Description of the dashboard, what it is, and how to use it.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
chartType: Optional[ChartType] = None
chartUrl: Optional[AnyUrl] = Field(
None, description='Chart URL, pointing to its own Service URL.'

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/dashboard.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -29,6 +29,14 @@ class Dashboard(BaseModel):
description: Optional[str] = Field(
None, description='Description of the dashboard, what it is, and how to use it.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
dashboardUrl: Optional[AnyUrl] = Field(None, description='Dashboard URL.')
charts: Optional[List[entityReference.EntityReference]] = Field(
None, description='All the charts included in this Dashboard.'

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/database.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -26,9 +26,20 @@ class Database(BaseModel):
None,
description="Name that uniquely identifies a database in the format 'ServiceName.DatabaseName'.",
)
displayName: Optional[str] = Field(
None, description='Display Name that identifies this database.'
)
description: Optional[str] = Field(
None, description='Description of the database instance.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to this entity.'
)

View File

@ -1,14 +1,14 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/metrics.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
from typing import Optional
from typing import List, Optional
from pydantic import BaseModel, Field, constr
from ...type import basic, entityReference, usageDetails
from ...type import basic, entityReference, tagLabel, usageDetails
class Metrics(BaseModel):
@ -22,16 +22,30 @@ class Metrics(BaseModel):
None,
description="A unique name that identifies a metric in the format 'ServiceName.MetricName'.",
)
displayName: Optional[str] = Field(
None, description='Display Name that identifies this metric.'
)
description: Optional[str] = Field(
None,
description='Description of metrics instance, what it is, and how to use it.',
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to this entity.'
)
owner: Optional[entityReference.EntityReference] = Field(
None, description='Owner of this metrics.'
)
tags: Optional[List[tagLabel.TagLabel]] = Field(
None, description='Tags for this chart.'
)
service: entityReference.EntityReference = Field(
..., description='Link to service where this metrics is hosted in.'
)

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/model.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -22,12 +22,20 @@ class Model(BaseModel):
displayName: Optional[str] = Field(
None, description='Display Name that identifies this model.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
description: Optional[str] = Field(
None, description='Description of the model, what it is, and how to use it.'
)
algorithm: str = Field(..., description='Algorithm used to train the model')
algorithm: str = Field(..., description='Algorithm used to train the model.')
dashboard: Optional[entityReference.EntityReference] = Field(
None, description='Performance Dashboard URL to track metric evolution'
None, description='Performance Dashboard URL to track metric evolution.'
)
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to this entity.'

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/pipeline.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -29,14 +29,22 @@ class Pipeline(BaseModel):
description: Optional[str] = Field(
None, description='Description of this Pipeline.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
pipelineUrl: Optional[AnyUrl] = Field(
None,
description='Pipeline URL to visit/manage. This URL points to respective pipeline service UI',
description='Pipeline URL to visit/manage. This URL points to respective pipeline service UI.',
)
concurrency: Optional[int] = Field(None, description='Concurrency of the Pipeline')
pipelineLocation: Optional[str] = Field(None, description='Pipeline Code Location')
concurrency: Optional[int] = Field(None, description='Concurrency of the Pipeline.')
pipelineLocation: Optional[str] = Field(None, description='Pipeline Code Location.')
startDate: Optional[basic.DateTime] = Field(
None, description='Start date of the workflow'
None, description='Start date of the workflow.'
)
tasks: Optional[List[entityReference.EntityReference]] = Field(
None, description='All the tasks that are part of pipeline.'

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/report.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -22,9 +22,21 @@ class Report(BaseModel):
None,
description="A unique name that identifies a report in the format 'ServiceName.ReportName'.",
)
displayName: Optional[str] = Field(
None,
description='Display Name that identifies this report. It could be title or label from the source services.',
)
description: Optional[str] = Field(
None, description='Description of this report instance.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to this report.'
)

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/table.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -80,7 +80,7 @@ class TableConstraint(BaseModel):
class ColumnName(BaseModel):
__root__: constr(regex=r'^[^.]*$', min_length=1, max_length=64) = Field(
...,
description='Local name (not fully qualified name) of the column. ColumnName is `-` when the column is not named in struct dataType. For example, BigQuery supports struct with unnamed fields',
description='Local name (not fully qualified name) of the column. ColumnName is `-` when the column is not named in struct dataType. For example, BigQuery supports struct with unnamed fields.',
)
@ -219,26 +219,32 @@ class Table(BaseModel):
name: TableName = Field(
..., description='Name of a table. Expected to be unique within a database.'
)
description: Optional[str] = Field(None, description='Description of a table.')
href: Optional[basic.Href] = Field(None, description='Link to this table resource.')
tableType: Optional[TableType] = None
displayName: Optional[str] = Field(
None,
description='Display Name that identifies this table. It could be title or label from the source services.',
)
fullyQualifiedName: Optional[str] = Field(
None,
description='Fully qualified name of a table in the form `serviceName.databaseName.tableName`.',
)
description: Optional[str] = Field(None, description='Description of a table.')
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: Optional[basic.Href] = Field(None, description='Link to this table resource.')
tableType: Optional[TableType] = None
columns: List[Column] = Field(..., description='Columns in this table.')
tableConstraints: Optional[List[TableConstraint]] = Field(
None, description='Table constraints.'
)
usageSummary: Optional[usageDetails.TypeUsedToReturnUsageDetailsOfAnEntity] = Field(
None, description='Latest usage information for this table.'
)
owner: Optional[entityReference.EntityReference] = Field(
None, description='Owner of this table.'
)
followers: Optional[entityReference.EntityReferenceList] = Field(
None, description='Followers of this table.'
)
database: Optional[entityReference.EntityReference] = Field(
None, description='Reference to Database that contains this table.'
)
@ -248,6 +254,12 @@ class Table(BaseModel):
tags: Optional[List[tagLabel.TagLabel]] = Field(
None, description='Tags for this table.'
)
usageSummary: Optional[usageDetails.TypeUsedToReturnUsageDetailsOfAnEntity] = Field(
None, description='Latest usage information for this table.'
)
followers: Optional[entityReference.EntityReferenceList] = Field(
None, description='Followers of this table.'
)
joins: Optional[TableJoins] = Field(
None,
description='Details of other tables this table is frequently joined with.',

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/task.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -27,23 +27,31 @@ class Task(BaseModel):
description="A unique name that identifies a pipeline in the format 'ServiceName.PipelineName.TaskName'.",
)
description: Optional[str] = Field(None, description='Description of this Task.')
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
taskUrl: Optional[AnyUrl] = Field(
None,
description='Task URL to visit/manage. This URL points to respective pipeline service UI',
description='Task URL to visit/manage. This URL points to respective pipeline service UI.',
)
downstreamTasks: Optional[List[constr(min_length=1, max_length=64)]] = Field(
None, description='All the tasks that are downstream of this task.'
)
taskType: Optional[str] = Field(
None, description='Type of the Task. Usually refers to the class it implements'
None, description='Type of the Task. Usually refers to the class it implements.'
)
taskSQL: Optional[str] = Field(
None, description='SQL used in the task. Can be used to determine the lineage'
None, description='SQL used in the task. Can be used to determine the lineage.'
)
startDate: Optional[basic.DateTime] = Field(
None, description='Start date of the task'
None, description='Start date of the task.'
)
endDate: Optional[basic.DateTime] = Field(None, description='End date of the task')
endDate: Optional[basic.DateTime] = Field(None, description='End date of the task.')
tags: Optional[List[tagLabel.TagLabel]] = Field(
None, description='Tags for this Pipeline.'
)

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/data/topic.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -39,9 +39,21 @@ class Topic(BaseModel):
None,
description="Name that uniquely identifies a topic in the format 'messagingServiceName.topicName'.",
)
displayName: Optional[str] = Field(
None,
description='Display Name that identifies this topic. It could be title or label from the source services.',
)
description: Optional[str] = Field(
None, description='Description of the topic instance.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
service: entityReference.EntityReference = Field(
...,
description='Link to the messaging cluster/service where this topic is hosted in.',

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/feed/thread.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/services/dashboardService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -26,12 +26,23 @@ class DashboardService(BaseModel):
name: constr(min_length=1, max_length=64) = Field(
..., description='Name that identifies this dashboard service.'
)
displayName: Optional[str] = Field(
None, description='Display Name that identifies this dashboard service.'
)
serviceType: DashboardServiceType = Field(
..., description='Type of dashboard service such as Looker or Superset...'
)
description: Optional[str] = Field(
None, description='Description of a dashboard service instance.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
dashboardUrl: AnyUrl = Field(
...,
description='Dashboard Service URL. This will be used to make REST API calls to Dashboard Service.',

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/services/databaseService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -34,6 +34,9 @@ class DatabaseService(BaseModel):
name: constr(min_length=1, max_length=64) = Field(
..., description='Name that identifies this database service.'
)
displayName: Optional[str] = Field(
None, description='Display Name that identifies this database service.'
)
serviceType: DatabaseServiceType = Field(
...,
description='Type of database service such as MySQL, BigQuery, Snowflake, Redshift, Postgres...',
@ -41,6 +44,14 @@ class DatabaseService(BaseModel):
description: Optional[str] = Field(
None, description='Description of a database service instance.'
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: basic.Href = Field(
..., description='Link to the resource corresponding to this database service.'
)

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/services/messagingService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -37,6 +37,18 @@ class MessagingService(BaseModel):
description: Optional[str] = Field(
None, description='Description of a messaging service instance.'
)
displayName: Optional[str] = Field(
None,
description='Display Name that identifies this messaging service. It could be title or label from the source services.',
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
brokers: Brokers = Field(
...,
description='Multiple bootstrap addresses for Kafka. Single proxy address for Pulsar.',

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/services/pipelineService.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -30,7 +30,19 @@ class PipelineService(BaseModel):
description: Optional[str] = Field(
None, description='Description of a pipeline service instance.'
)
pipelineUrl: AnyUrl = Field(..., description='Pipeline Service Management/UI URL')
displayName: Optional[str] = Field(
None,
description='Display Name that identifies this pipeline service. It could be title or label from the source services.',
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
pipelineUrl: AnyUrl = Field(..., description='Pipeline Service Management/UI URL.')
ingestionSchedule: Optional[schedule.Schedule] = Field(
None, description='Schedule for running metadata ingestion jobs.'
)

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/tags/tagCategory.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -33,6 +33,14 @@ class Tag(BaseModel):
description='Unique name of the tag of format Category.PrimaryTag.SecondaryTag.',
)
description: str = Field(..., description='Unique name of the tag category.')
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to the tag.'
)
@ -55,7 +63,18 @@ class TagCategory(BaseModel):
extra = Extra.forbid
name: TagName
displayName: Optional[str] = Field(
None, description='Display Name that identifies this tag category.'
)
description: str = Field(..., description='Description of the tag category.')
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
categoryType: TagCategoryType
href: Optional[basic.Href] = Field(
None, description='Link to the resource corresponding to the tag category.'

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/teams/team.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -25,6 +25,14 @@ class Team(BaseModel):
None, description="Name used for display purposes. Example 'Data Science team'."
)
description: Optional[str] = Field(None, description='Description of the team.')
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
href: basic.Href = Field(
..., description='Link to the resource corresponding to this entity.'
)

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/entity/teams/user.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -30,6 +30,14 @@ class User(BaseModel):
None,
description="Name used for display purposes. Example 'FirstName LastName'.",
)
version: Optional[basic.EntityVersion] = Field(
None, description='Metadata version of the entity.'
)
updatedAt: Optional[basic.DateTime] = Field(
None,
description='Last update time corresponding to the new version of the entity.',
)
updatedBy: Optional[str] = Field(None, description='User who made the update.')
email: basic.Email = Field(..., description='Email address of the user.')
href: basic.Href = Field(
..., description='Link to the resource corresponding to this entity.'

View File

@ -1,3 +1,3 @@
# generated by datamodel-codegen:
# filename: json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/auditLog.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/basic.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -8,7 +8,7 @@ from datetime import date, datetime
from typing import Any, Optional
from uuid import UUID
from pydantic import AnyUrl, BaseModel, EmailStr, Field, constr
from pydantic import AnyUrl, BaseModel, EmailStr, Field, confloat, constr
class Basic(BaseModel):
@ -29,13 +29,6 @@ class Email(BaseModel):
)
class EntityLink(BaseModel):
__root__: constr(regex=r'^<#E/\S+/\S+>$') = Field(
...,
description='Link to an entity or field within an entity using this format `<#E/{enties}/{entityName}/{field}/{fieldValue}`.',
)
class Timestamp(BaseModel):
__root__: str = Field(..., description='Timestamp in unixTimeMillis.')
@ -68,6 +61,20 @@ class DateTime(BaseModel):
)
class EntityVersion(BaseModel):
__root__: confloat(ge=0.1, multiple_of=0.1) = Field(
...,
description='Metadata version of the entity in the form `Major.Minor`. First version always starts from `0.1` when the entity is created. When the backward compatible changes are made to the entity, only the `Minor` version is incremented - example `1.0` is changed to `1.1`. When backward incompatible changes are made the `Major` version is incremented - example `1.1` to `2.0`.',
)
class EntityLink(BaseModel):
__root__: constr(regex=r'^<#E/\S+/\S+>$') = Field(
...,
description='Link to an entity or field within an entity using this format `<#E/{enties}/{entityName}/{field}/{fieldValue}`.',
)
class SqlQuery(BaseModel):
__root__: str = Field(
..., description="SQL query statement. Example - 'select * from orders'."

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/collectionDescriptor.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/dailyCount.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/entityLineage.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -26,7 +26,7 @@ class EntityLineage(BaseModel):
extra = Extra.forbid
entity: entityReference.EntityReference = Field(
..., description='Primary entity for which this lineage graph is created'
..., description='Primary entity for which this lineage graph is created.'
)
nodes: Optional[List[entityReference.EntityReference]] = None
upstreamEdges: Optional[List[Edge]] = None

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/entityReference.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations
@ -29,6 +29,9 @@ class EntityReference(BaseModel):
description: Optional[str] = Field(
None, description='Optional description of entity.'
)
displayName: Optional[str] = Field(
None, description='Display Name that identifies this entity.'
)
href: Optional[basic.Href] = Field(None, description='Link to the entity resource.')

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/entityUsage.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/jdbcConnection.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/paging.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/profile.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/schedule.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/tagLabel.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: schema/type/usageDetails.json
# timestamp: 2021-10-12T00:34:28+00:00
# timestamp: 2021-10-21T16:10:22+00:00
from __future__ import annotations

View File

@ -0,0 +1,329 @@
import logging
from typing import Generic, List, Type, TypeVar, Union, get_args
from pydantic import BaseModel
from metadata.generated.schema.api.lineage.addLineage import AddLineage
from metadata.generated.schema.entity.data.chart import Chart
from metadata.generated.schema.entity.data.dashboard import Dashboard
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.metrics import Metrics
from metadata.generated.schema.entity.data.model import Model
from metadata.generated.schema.entity.data.pipeline import Pipeline
from metadata.generated.schema.entity.data.report import Report
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.data.task import Task
from metadata.generated.schema.entity.data.topic import Topic
from metadata.generated.schema.entity.services.dashboardService import DashboardService
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.entity.services.messagingService import MessagingService
from metadata.generated.schema.entity.services.pipelineService import PipelineService
from metadata.generated.schema.entity.teams.user import User
from metadata.ingestion.ometa.auth_provider import AuthenticationProvider
from metadata.ingestion.ometa.client import REST, ClientConfig
from metadata.ingestion.ometa.openmetadata_rest import (
Auth0AuthenticationProvider,
GoogleAuthenticationProvider,
MetadataServerConfig,
NoOpAuthenticationProvider,
OktaAuthenticationProvider,
)
logger = logging.getLogger(__name__)
# The naming convention is T for Entity Types and C for Create Types
T = TypeVar("T", bound=BaseModel)
C = TypeVar("C", bound=BaseModel)
class MissingEntityTypeException(Exception):
"""
We are receiving an Entity Type[T] not covered
in our suffix generation list
"""
class EntityList(Generic[T], BaseModel):
entities: List[T]
total: int
after: str = None
class OMeta(Generic[T, C]):
"""
Generic interface to the OpenMetadata API
It is a polymorphism on all our different Entities
"""
client: REST
_auth_provider: AuthenticationProvider
class_root = ".".join(["metadata", "generated", "schema"])
entity_path = "entity"
api_path = "api"
data_path = "data"
services_path = "services"
teams_path = "teams"
def __init__(self, config: MetadataServerConfig, raw_data: bool = False):
self.config = config
if self.config.auth_provider_type == "google":
self._auth_provider: AuthenticationProvider = (
GoogleAuthenticationProvider.create(self.config)
)
elif self.config.auth_provider_type == "okta":
self._auth_provider: AuthenticationProvider = (
OktaAuthenticationProvider.create(self.config)
)
elif self.config.auth_provider_type == "auth0":
self._auth_provider: AuthenticationProvider = (
Auth0AuthenticationProvider.create(self.config)
)
else:
self._auth_provider: AuthenticationProvider = (
NoOpAuthenticationProvider.create(self.config)
)
client_config: ClientConfig = ClientConfig(
base_url=self.config.api_endpoint,
api_version=self.config.api_version,
auth_header="X-Catalog-Source",
auth_token=self._auth_provider.auth_token(),
)
self.client = REST(client_config)
self._use_raw_data = raw_data
def get_suffix(self, entity: Type[T]) -> str:
"""
Given an entity Type from the generated sources,
return the endpoint to run requests.
Might be interesting to follow a more strict
and type-checked approach
"""
# Entity Schemas
if issubclass(
entity, get_args(Union[Model, self.get_create_entity_type(Model)])
):
return "/models"
if issubclass(
entity, get_args(Union[Chart, self.get_create_entity_type(Chart)])
):
return "/charts"
if issubclass(
entity, get_args(Union[Dashboard, self.get_create_entity_type(Dashboard)])
):
return "/dashboards"
if issubclass(
entity, get_args(Union[Database, self.get_create_entity_type(Database)])
):
return "/databases"
if issubclass(
entity, get_args(Union[Pipeline, self.get_create_entity_type(Pipeline)])
):
return "/pipelines"
if issubclass(
entity, get_args(Union[Table, self.get_create_entity_type(Table)])
):
return "/tables"
if issubclass(entity, get_args(Union[Task, self.get_create_entity_type(Task)])):
return "/tasks"
if issubclass(
entity, get_args(Union[Topic, self.get_create_entity_type(Topic)])
):
return "/topics"
if issubclass(entity, Metrics):
return "/metrics"
if issubclass(entity, AddLineage):
return "/lineage"
if issubclass(entity, Report):
return "/reports"
if issubclass(entity, get_args(Union[User, self.get_create_entity_type(User)])):
return "/users"
# Services Schemas
if issubclass(
entity,
get_args(
Union[DatabaseService, self.get_create_entity_type(DatabaseService)]
),
):
return "/services/databaseServices"
if issubclass(
entity,
get_args(
Union[DashboardService, self.get_create_entity_type(DashboardService)]
),
):
return "/services/dashboardServices"
if issubclass(
entity,
get_args(
Union[MessagingService, self.get_create_entity_type(MessagingService)]
),
):
return "/services/messagingServices"
if issubclass(
entity,
get_args(
Union[PipelineService, self.get_create_entity_type(PipelineService)]
),
):
return "/services/pipelineServices"
raise MissingEntityTypeException(
f"Missing {entity} type when generating suffixes"
)
def get_module_path(self, entity: Type[T]) -> str:
"""
Based on the entity, return the module path
it is found inside generated
"""
if "service" in entity.__name__.lower():
return self.services_path
if "user" in entity.__name__.lower():
return self.teams_path
return self.data_path
def get_create_entity_type(self, entity: Type[T]) -> Type[C]:
"""
imports and returns the Create Type from an Entity Type T.
We are following the expected path structure to import
on-the-fly the necessary class and pass it to the consumer
"""
file_name = f"create{entity.__name__}"
class_path = ".".join(
[self.class_root, self.api_path, self.get_module_path(entity), file_name]
)
class_name = f"Create{entity.__name__}EntityRequest"
create_class = getattr(
__import__(class_path, globals(), locals(), [class_name]), class_name
)
return create_class
def get_entity_from_create(self, create: Type[C]) -> Type[T]:
"""
Inversely, import the Entity type based on the create Entity class
"""
class_name = create.__name__.replace("Create", "").replace("EntityRequest", "")
file_name = class_name.lower()
class_path = ".".join(
[
self.class_root,
self.entity_path,
self.get_module_path(create),
file_name.replace("service", "Service")
if "service" in create.__name__.lower()
else file_name,
]
)
entity_class = getattr(
__import__(class_path, globals(), locals(), [class_name]), class_name
)
return entity_class
def create_or_update(self, entity: Type[T], data: T) -> Type[C]:
"""
We allow both Entity and CreateEntity for PUT
If Entity, no need to find response class mapping.
We PUT to the endpoint and return the Entity generated result
Here the typing is a bit more weird. We will get a type T, be it
Entity or CreateEntity, and we are always going to return Entity
"""
is_create = "create" in entity.__name__.lower()
is_service = "service" in entity.__name__.lower()
# Prepare the return Entity Type
if is_create:
entity_class = self.get_entity_from_create(entity)
else:
entity_class = entity
# Prepare the request method
if is_service and is_create:
# Services can only be created via POST
method = self.client.post
else:
method = self.client.put
resp = method(self.get_suffix(entity), data=data.json())
return entity_class(**resp)
def get_by_name(self, entity: Type[T], name: str) -> Type[T]:
resp = self.client.get(f"{self.get_suffix(entity)}/name/{name}")
return entity(**resp)
def get_by_id(self, entity: Type[T], entity_id: str) -> Type[T]:
resp = self.client.get(f"{self.get_suffix(entity)}/{entity_id}")
return entity(**resp)
def list_entities(
self, entity: Type[T], fields: str = None, after: str = None, limit: int = 1000
) -> EntityList[T]:
"""
Helps us paginate over the collection
"""
suffix = self.get_suffix(entity)
if fields is None:
resp = self.client.get(suffix)
else:
if after is not None:
resp = self.client.get(
f"{suffix}?fields={fields}&after={after}&limit={limit}"
)
else:
resp = self.client.get(f"{suffix}?fields={fields}&limit={limit}")
if self._use_raw_data:
return resp
else:
entities = [entity(**t) for t in resp["data"]]
total = resp["paging"]["total"]
after = resp["paging"]["after"] if "after" in resp["paging"] else None
return EntityList(entities=entities, total=total, after=after)
def list_services(self, entity: Type[T]) -> List[EntityList[T]]:
"""
Service listing does not implement paging
"""
resp = self.client.get(self.get_suffix(entity))
if self._use_raw_data:
return resp
else:
return [entity(**p) for p in resp["data"]]
def delete(self, entity: Type[T], entity_id: str) -> None:
self.client.delete(f"{self.get_suffix(entity)}/{entity_id}")

View File

@ -40,7 +40,7 @@ from metadata.generated.schema.api.data.createPipeline import (
)
from metadata.generated.schema.api.data.createTable import CreateTableEntityRequest
from metadata.generated.schema.api.data.createTask import CreateTaskEntityRequest
from metadata.generated.schema.api.data.createTopic import CreateTopic
from metadata.generated.schema.api.data.createTopic import CreateTopicEntityRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineage
from metadata.generated.schema.api.services.createDashboardService import (
CreateDashboardServiceEntityRequest,
@ -419,7 +419,9 @@ class OpenMetadataAPIClient(object):
)
return MessagingService(**resp)
def create_or_update_topic(self, create_topic_request: CreateTopic) -> Topic:
def create_or_update_topic(
self, create_topic_request: CreateTopicEntityRequest
) -> Topic:
"""Create or Update a Table"""
resp = self.client.put("/topics", data=create_topic_request.json())
return Topic(**resp)

View File

@ -31,7 +31,7 @@ from metadata.generated.schema.api.data.createPipeline import (
)
from metadata.generated.schema.api.data.createTable import CreateTableEntityRequest
from metadata.generated.schema.api.data.createTask import CreateTaskEntityRequest
from metadata.generated.schema.api.data.createTopic import CreateTopic
from metadata.generated.schema.api.data.createTopic import CreateTopicEntityRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineage
from metadata.generated.schema.entity.data.chart import ChartType
from metadata.generated.schema.entity.data.model import Model
@ -99,7 +99,7 @@ class MetadataRestSink(Sink):
def write_record(self, record: Record) -> None:
if isinstance(record, OMetaDatabaseAndTable):
self.write_tables(record)
elif isinstance(record, CreateTopic):
elif isinstance(record, CreateTopicEntityRequest):
self.write_topics(record)
elif isinstance(record, Chart):
self.write_charts(record)
@ -173,7 +173,7 @@ class MetadataRestSink(Sink):
logger.error(err)
self.status.failure(f"Table: {table_and_db.table.name.__root__}")
def write_topics(self, topic: CreateTopic) -> None:
def write_topics(self, topic: CreateTopicEntityRequest) -> None:
try:
created_topic = self.client.create_or_update_topic(topic)
logger.info(f"Successfully ingested topic {created_topic.name.__root__}")

View File

@ -24,7 +24,7 @@ from confluent_kafka.schema_registry.schema_registry_client import (
)
from metadata.config.common import ConfigModel
from metadata.generated.schema.api.data.createTopic import CreateTopic
from metadata.generated.schema.api.data.createTopic import CreateTopicEntityRequest
from metadata.generated.schema.entity.data.topic import SchemaType, Topic
from metadata.generated.schema.entity.services.messagingService import (
MessagingServiceType,
@ -104,13 +104,13 @@ class KafkaSource(Source):
def prepare(self):
pass
def next_record(self) -> Iterable[CreateTopic]:
def next_record(self) -> Iterable[CreateTopicEntityRequest]:
topics = self.admin_client.list_topics().topics
for t in topics:
if self.config.filter_pattern.included(t):
logger.info("Fetching topic schema {}".format(t))
topic_schema = self._parse_topic_metadata(t)
topic = CreateTopic(
topic = CreateTopicEntityRequest(
name=t,
service=EntityReference(
id=self.service.id, type="messagingService"

View File

@ -27,7 +27,7 @@ from faker import Faker
from pydantic import ValidationError
from metadata.config.common import ConfigModel
from metadata.generated.schema.api.data.createTopic import CreateTopic
from metadata.generated.schema.api.data.createTopic import CreateTopicEntityRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineage
from metadata.generated.schema.api.services.createDashboardService import (
CreateDashboardServiceEntityRequest,
@ -435,12 +435,12 @@ class SampleDataSource(Source):
self.status.scanned("table", table_metadata.name.__root__)
yield table_and_db
def ingest_topics(self) -> Iterable[CreateTopic]:
def ingest_topics(self) -> Iterable[CreateTopicEntityRequest]:
for topic in self.topics["topics"]:
topic["service"] = EntityReference(
id=self.kafka_service.id, type="messagingService"
)
create_topic = CreateTopic(**topic)
create_topic = CreateTopicEntityRequest(**topic)
self.status.scanned("topic", create_topic.name.__root__)
yield create_topic

View File

@ -7,7 +7,7 @@ from typing import Iterable, List
from faker import Faker
from metadata.generated.schema.api.data.createTopic import CreateTopic
from metadata.generated.schema.api.data.createTopic import CreateTopicEntityRequest
from metadata.generated.schema.api.services.createDashboardService import (
CreateDashboardServiceEntityRequest,
)
@ -281,7 +281,7 @@ class SampleEntitySource(Source):
)
yield dashboard
def ingest_topics(self) -> Iterable[CreateTopic]:
def ingest_topics(self) -> Iterable[CreateTopicEntityRequest]:
for h in range(self.config.no_of_services):
create_service = None
while True:
@ -304,7 +304,7 @@ class SampleEntitySource(Source):
"Ingesting service {}/{}".format(h + 1, self.config.no_of_services)
)
for j in range(self.config.no_of_topics):
topic_entity = CreateTopic(
topic_entity = CreateTopicEntityRequest(
name=self.table_name(),
description=self.description(),
partitions=self.chart_ids(),

View File

@ -0,0 +1,177 @@
"""
OpenMetadata high-level API Database test
"""
import uuid
from unittest import TestCase
from metadata.generated.schema.api.data.createDatabase import (
CreateDatabaseEntityRequest,
)
from metadata.generated.schema.api.services.createDatabaseService import (
CreateDatabaseServiceEntityRequest,
)
from metadata.generated.schema.api.teams.createUser import CreateUserEntityRequest
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.services.databaseService import (
DatabaseService,
DatabaseServiceType,
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.generated.schema.type.jdbcConnection import JdbcInfo
from metadata.ingestion.ometa.ometa_api import OMeta
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
class OMetaDatabaseTest(TestCase):
"""
Run this integration test with the local API available
Install the ingestion package before running the tests
"""
service_entity_id = None
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
metadata = OMeta(server_config)
user = metadata.create_or_update(
entity=CreateUserEntityRequest,
data=CreateUserEntityRequest(name="random-user", email="random@user.com"),
)
owner = EntityReference(id=user.id, type="user")
service = CreateDatabaseServiceEntityRequest(
name="test-service",
serviceType=DatabaseServiceType.MySQL,
jdbc=JdbcInfo(driverClass="jdbc", connectionUrl="jdbc://localhost"),
)
def setUp(self) -> None:
"""
Prepare ingredients
"""
self.service_entity = self.metadata.create_or_update(
entity=CreateDatabaseServiceEntityRequest, data=self.service
)
self.entity = Database(
id=uuid.uuid4(),
name="test-db",
service=EntityReference(id=self.service_entity.id, type="databaseService"),
fullyQualifiedName="test-service.test-db",
)
self.create = CreateDatabaseEntityRequest(
name="test-db",
service=EntityReference(id=self.service_entity.id, type="databaseService"),
)
self.service_entity_id = str(self.service_entity.id.__root__)
def tearDown(self) -> None:
"""
Clean up
"""
self.metadata.delete(entity=DatabaseService, entity_id=self.service_entity_id)
def test_create(self):
"""
We can create a Database and we receive it back as Entity
"""
res = self.metadata.create_or_update(
entity=CreateDatabaseEntityRequest, data=self.create
)
self.assertEqual(res.name, self.create.name)
self.assertEqual(res.service.id, self.create.service.id)
self.assertEqual(res.owner, None)
def test_update(self):
"""
Updating it properly changes its properties
"""
res_create = self.metadata.create_or_update(
entity=CreateDatabaseEntityRequest, data=self.create
)
updated = self.entity.dict(exclude_unset=True)
updated["owner"] = self.owner
updated_entity = Database(**updated)
res = self.metadata.create_or_update(entity=Database, data=updated_entity)
# Same ID, updated algorithm
self.assertEqual(res.service.id, updated_entity.service.id)
self.assertEqual(res_create.id, res.id)
self.assertEqual(res.owner.id, self.user.id)
def test_get_name(self):
"""
We can fetch a Database by name and get it back as Entity
"""
self.metadata.create_or_update(entity=Database, data=self.entity)
res = self.metadata.get_by_name(
entity=Database, fqdn=self.entity.fullyQualifiedName
)
self.assertEqual(res.name, self.entity.name)
def test_get_id(self):
"""
We can fetch a Database by ID and get it back as Entity
"""
self.metadata.create_or_update(entity=Database, data=self.entity)
# First pick up by name
res_name = self.metadata.get_by_name(
entity=Database, fqdn=self.entity.fullyQualifiedName
)
# Then fetch by ID
res = self.metadata.get_by_id(
entity=Database, entity_id=str(res_name.id.__root__)
)
self.assertEqual(res_name.id, res.id)
def test_list(self):
"""
We can list all our Database
"""
self.metadata.create_or_update(entity=Database, data=self.entity)
res = self.metadata.list_entities(entity=Database)
# Fetch our test Database. We have already inserted it, so we should find it
data = next(
iter(ent for ent in res.entities if ent.name == self.entity.name), None
)
assert data
def test_delete(self):
"""
We can delete a Database by ID
"""
self.metadata.create_or_update(entity=Database, data=self.entity)
# Find by name
res_name = self.metadata.get_by_name(
entity=Database, fqdn=self.entity.fullyQualifiedName
)
# Then fetch by ID
res_id = self.metadata.get_by_id(
entity=Database, entity_id=str(res_name.id.__root__)
)
# Delete
self.metadata.delete(entity=Database, entity_id=str(res_id.id.__root__))
# Then we should not find it
res = self.metadata.list_entities(entity=Database)
print(res)
assert not next(
iter(ent for ent in res.entities if ent.name == self.entity.name), None
)

View File

@ -0,0 +1,138 @@
"""
OpenMetadata high-level API Model test
"""
import uuid
from unittest import TestCase
from metadata.generated.schema.api.data.createModel import CreateModelEntityRequest
from metadata.generated.schema.api.teams.createUser import CreateUserEntityRequest
from metadata.generated.schema.entity.data.model import Model
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.ometa.ometa_api import OMeta
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
class OMetaModelTest(TestCase):
"""
Run this integration test with the local API available
Install the ingestion package before running the tests
"""
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
metadata = OMeta(server_config)
user = metadata.create_or_update(
entity=CreateUserEntityRequest,
data=CreateUserEntityRequest(name="random-user", email="random@user.com"),
)
owner = EntityReference(id=user.id, type="user")
entity = Model(
id=uuid.uuid4(),
name="test-model",
algorithm="algo",
fullyQualifiedName="test-model",
)
create = CreateModelEntityRequest(name="test-model", algorithm="algo")
def test_create(self):
"""
We can create a Model and we receive it back as Entity
"""
res = self.metadata.create_or_update(
entity=CreateModelEntityRequest, data=self.create
)
self.assertEqual(res.name, self.create.name)
self.assertEqual(res.algorithm, self.create.algorithm)
self.assertEqual(res.owner, None)
def test_update(self):
"""
Updating it properly changes its properties
"""
res_create = self.metadata.create_or_update(
entity=CreateModelEntityRequest, data=self.create
)
updated = self.entity.dict(exclude_unset=True)
updated["owner"] = self.owner
updated_entity = Model(**updated)
res = self.metadata.create_or_update(entity=Model, data=updated_entity)
# Same ID, updated algorithm
self.assertEqual(res.algorithm, updated_entity.algorithm)
self.assertEqual(res_create.id, res.id)
self.assertEqual(res.owner.id, self.user.id)
def test_get_name(self):
"""
We can fetch a model by name and get it back as Entity
"""
self.metadata.create_or_update(entity=Model, data=self.entity)
res = self.metadata.get_by_name(
entity=Model, fqdn=self.entity.fullyQualifiedName
)
self.assertEqual(res.name, self.entity.name)
def test_get_id(self):
"""
We can fetch a model by ID and get it back as Entity
"""
self.metadata.create_or_update(entity=Model, data=self.entity)
# First pick up by name
res_name = self.metadata.get_by_name(
entity=Model, fqdn=self.entity.fullyQualifiedName
)
# Then fetch by ID
res = self.metadata.get_by_id(entity=Model, entity_id=str(res_name.id.__root__))
self.assertEqual(res_name.id, res.id)
def test_list(self):
"""
We can list all our models
"""
self.metadata.create_or_update(entity=Model, data=self.entity)
res = self.metadata.list_entities(entity=Model)
# Fetch our test model. We have already inserted it, so we should find it
data = next(
iter(ent for ent in res.entities if ent.name == self.entity.name), None
)
assert data
def test_delete(self):
"""
We can delete a model by ID
"""
self.metadata.create_or_update(entity=Model, data=self.entity)
# Find by name
res_name = self.metadata.get_by_name(
entity=Model, fqdn=self.entity.fullyQualifiedName
)
# Then fetch by ID
res_id = self.metadata.get_by_id(
entity=Model, entity_id=str(res_name.id.__root__)
)
# Delete
self.metadata.delete(entity=Model, entity_id=str(res_id.id.__root__))
# Then we should not find it
res = self.metadata.list_entities(entity=Model)
print(res)
assert not next(
iter(ent for ent in res.entities if ent.name == self.entity.name), None
)

View File

@ -0,0 +1,120 @@
"""
OpenMetadata high-level API endpoint test
"""
from unittest import TestCase
from metadata.generated.schema.api.data.createModel import CreateModelEntityRequest
from metadata.generated.schema.api.data.createTopic import CreateTopicEntityRequest
from metadata.generated.schema.api.services.createDatabaseService import (
CreateDatabaseServiceEntityRequest,
)
from metadata.generated.schema.api.teams.createUser import CreateUserEntityRequest
from metadata.generated.schema.entity.data.chart import Chart
from metadata.generated.schema.entity.data.dashboard import Dashboard
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.metrics import Metrics
from metadata.generated.schema.entity.data.model import Model
from metadata.generated.schema.entity.data.pipeline import Pipeline
from metadata.generated.schema.entity.data.report import Report
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.data.task import Task
from metadata.generated.schema.entity.data.topic import Topic
from metadata.generated.schema.entity.services.dashboardService import DashboardService
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.entity.services.messagingService import MessagingService
from metadata.generated.schema.entity.services.pipelineService import PipelineService
from metadata.generated.schema.entity.teams.user import User
from metadata.ingestion.ometa.ometa_api import OMeta
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
class OMetaEndpointTest(TestCase):
"""
Make sure that we can infer the proper endpoints
from the generated entity classes
"""
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
metadata = OMeta(server_config)
def test_entities_suffix(self):
"""
Pass Entities and test their suffix generation
"""
# ML
self.assertEqual(self.metadata.get_suffix(Model), "/models")
# Db
self.assertEqual(self.metadata.get_suffix(Database), "/databases")
self.assertEqual(self.metadata.get_suffix(Table), "/tables")
# Dashboards
self.assertEqual(self.metadata.get_suffix(Dashboard), "/dashboards")
self.assertEqual(self.metadata.get_suffix(Chart), "/charts")
self.assertEqual(self.metadata.get_suffix(Metrics), "/metrics")
self.assertEqual(self.metadata.get_suffix(Report), "/reports")
# Pipelines
self.assertEqual(self.metadata.get_suffix(Pipeline), "/pipelines")
self.assertEqual(self.metadata.get_suffix(Task), "/tasks")
# Topic
self.assertEqual(self.metadata.get_suffix(Topic), "/topics")
def test_services_suffix(self):
"""
Pass Services and test their suffix generation
"""
self.assertEqual(
self.metadata.get_suffix(DashboardService), "/services/dashboardServices"
)
self.assertEqual(
self.metadata.get_suffix(DatabaseService), "/services/databaseServices"
)
self.assertEqual(
self.metadata.get_suffix(MessagingService), "/services/messagingServices"
)
self.assertEqual(
self.metadata.get_suffix(PipelineService), "/services/pipelineServices"
)
def test_teams_suffix(self):
"""
Pass Teams and test their suffix generation
"""
self.assertEqual(self.metadata.get_suffix(User), "/users")
def test_get_create_entity_type(self):
"""
Validate the mapping from Entity to CreateEntity
"""
create = self.metadata.get_create_entity_type(Model)
assert issubclass(create, CreateModelEntityRequest)
create = self.metadata.get_create_entity_type(Topic)
assert issubclass(create, CreateTopicEntityRequest)
create = self.metadata.get_create_entity_type(DatabaseService)
assert issubclass(create, CreateDatabaseServiceEntityRequest)
create = self.metadata.get_create_entity_type(User)
assert issubclass(create, CreateUserEntityRequest)
def test_get_entity_from_create(self):
"""
Validate the mapping from CreateEntity to Entity
"""
entity = self.metadata.get_entity_from_create(CreateModelEntityRequest)
assert issubclass(entity, Model)
entity = self.metadata.get_entity_from_create(CreateTopicEntityRequest)
assert issubclass(entity, Topic)
entity = self.metadata.get_entity_from_create(
CreateDatabaseServiceEntityRequest
)
assert issubclass(entity, DatabaseService)
entity = self.metadata.get_entity_from_create(CreateUserEntityRequest)
assert issubclass(entity, User)