mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-06-27 04:22:05 +00:00
Setup.py Refactored, ES port fix (#521)
* Pylint build failure fixed * Setup & dependency modified, Data profiler default to False, ES port fix * Profiler requirements refactored * Setup.py requirement fix * openmetadata-ingestion version upgrade
This commit is contained in:
parent
3937971959
commit
7652baa00d
@ -1,103 +0,0 @@
|
|||||||
---
|
|
||||||
description: This guide will help install Redshift connector and run manually
|
|
||||||
---
|
|
||||||
|
|
||||||
# Redshift
|
|
||||||
|
|
||||||
{% hint style="info" %}
|
|
||||||
**Prerequisites**
|
|
||||||
|
|
||||||
OpenMetadata is built using Java, DropWizard, Jetty, and MySQL.
|
|
||||||
|
|
||||||
1. Python 3.7 or above
|
|
||||||
{% endhint %}
|
|
||||||
|
|
||||||
### Install from PyPI or Source
|
|
||||||
|
|
||||||
{% tabs %}
|
|
||||||
{% tab title="Install Using PyPI" %}
|
|
||||||
```bash
|
|
||||||
pip install 'openmetadata-ingestion[redshift]'
|
|
||||||
python -m spacy download en_core_web_sm
|
|
||||||
```
|
|
||||||
{% endtab %}
|
|
||||||
{% endtabs %}
|
|
||||||
|
|
||||||
## Run Manually
|
|
||||||
|
|
||||||
```bash
|
|
||||||
metadata ingest -c ./examples/workflows/redshift.json
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configuration
|
|
||||||
|
|
||||||
{% code title="redshift.json" %}
|
|
||||||
```javascript
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"type": "redshift",
|
|
||||||
"config": {
|
|
||||||
"host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
|
|
||||||
"username": "awsuser",
|
|
||||||
"password": "focguC-kaqqe5-nepsok",
|
|
||||||
"database": "warehouse",
|
|
||||||
"service_name": "aws_redshift",
|
|
||||||
"filter_pattern": {
|
|
||||||
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
...
|
|
||||||
```
|
|
||||||
{% endcode %}
|
|
||||||
|
|
||||||
1. **username** - pass the Redshift username. We recommend creating a user with read-only permissions to all the databases in your Redshift installation
|
|
||||||
2. **password** - password for the username
|
|
||||||
3. **service\_name** - Service Name for this Redshift cluster. If you added Redshift cluster through OpenMetadata UI, make sure the service name matches the same.
|
|
||||||
4. **filter\_pattern** - It contains includes, excludes options to choose which pattern of datasets you want to ingest into OpenMetadata
|
|
||||||
|
|
||||||
## Publish to OpenMetadata
|
|
||||||
|
|
||||||
Below is the configuration to publish Redshift data into the OpenMeatadata service.
|
|
||||||
|
|
||||||
Add optionally `pii` processor and `metadata-rest-tables` sink along with `metadata-server` config
|
|
||||||
|
|
||||||
{% code title="redshift.json" %}
|
|
||||||
```javascript
|
|
||||||
{
|
|
||||||
"source": {
|
|
||||||
"type": "redshift",
|
|
||||||
"config": {
|
|
||||||
"host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
|
|
||||||
"username": "awsuser",
|
|
||||||
"password": "focguC-kaqqe5-nepsok",
|
|
||||||
"database": "warehouse",
|
|
||||||
"service_name": "aws_redshift",
|
|
||||||
"filter_pattern": {
|
|
||||||
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "metadata-rest",
|
|
||||||
"config": {}
|
|
||||||
},
|
|
||||||
"metadata_server": {
|
|
||||||
"type": "metadata-server",
|
|
||||||
"config": {
|
|
||||||
"api_endpoint": "http://localhost:8585/api",
|
|
||||||
"auth_provider_type": "no-auth"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"cron": {
|
|
||||||
"minute": "*/5",
|
|
||||||
"hour": null,
|
|
||||||
"day": null,
|
|
||||||
"month": null,
|
|
||||||
"day_of_week": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
```
|
|
||||||
{% endcode %}
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
|||||||
{
|
|
||||||
"source": {
|
|
||||||
"type": "redshift",
|
|
||||||
"config": {
|
|
||||||
"host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
|
|
||||||
"username": "awsuser",
|
|
||||||
"password": "focguC-kaqqe5-nepsok",
|
|
||||||
"database": "warehouse",
|
|
||||||
"service_name": "aws_redshift",
|
|
||||||
"filter_pattern": {
|
|
||||||
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "metadata-rest",
|
|
||||||
"config": {}
|
|
||||||
},
|
|
||||||
"metadata_server": {
|
|
||||||
"type": "metadata-server",
|
|
||||||
"config": {
|
|
||||||
"api_endpoint": "http://localhost:8585/api",
|
|
||||||
"auth_provider_type": "no-auth"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"cron": {
|
|
||||||
"minute": "*/5",
|
|
||||||
"hour": null,
|
|
||||||
"day": null,
|
|
||||||
"month": null,
|
|
||||||
"day_of_week": null
|
|
||||||
}
|
|
||||||
}
|
|
@ -14,7 +14,7 @@
|
|||||||
"index_topics": "true",
|
"index_topics": "true",
|
||||||
"index_dashboards": "true",
|
"index_dashboards": "true",
|
||||||
"es_host": "localhost",
|
"es_host": "localhost",
|
||||||
"es_port": 9300
|
"es_port": 9200
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"metadata_server": {
|
"metadata_server": {
|
||||||
|
@ -41,9 +41,6 @@ scheduler_requirements = {
|
|||||||
"simplescheduler@git+git://github.com/open-metadata/simplescheduler.git#egg=simplescheduler"
|
"simplescheduler@git+git://github.com/open-metadata/simplescheduler.git#egg=simplescheduler"
|
||||||
}
|
}
|
||||||
|
|
||||||
profiler_requirements = {
|
|
||||||
"openmetadata-data-profiler@git+git://github.com/open-metadata/data-profiler.git#egg=openmetadata-data-profiler"
|
|
||||||
}
|
|
||||||
|
|
||||||
base_requirements = {
|
base_requirements = {
|
||||||
"commonregex",
|
"commonregex",
|
||||||
@ -65,10 +62,14 @@ base_requirements = {
|
|||||||
"okta>=1.7.0",
|
"okta>=1.7.0",
|
||||||
"sqlalchemy>=1.3.24",
|
"sqlalchemy>=1.3.24",
|
||||||
"sql-metadata~=2.0.0",
|
"sql-metadata~=2.0.0",
|
||||||
"spacy==3.0.5",
|
"requests~=2.25.1"
|
||||||
"requests~=2.25.1",
|
|
||||||
"en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web"
|
|
||||||
}
|
}
|
||||||
|
pii_requirements = {
|
||||||
|
"en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web",
|
||||||
|
"pandas~=1.3.1",
|
||||||
|
"spacy==3.0.5"
|
||||||
|
}
|
||||||
|
|
||||||
base_plugins = {
|
base_plugins = {
|
||||||
"query-parser",
|
"query-parser",
|
||||||
"metadata-usage",
|
"metadata-usage",
|
||||||
@ -88,16 +89,16 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"mssql-odbc": {"pyodbc"},
|
"mssql-odbc": {"pyodbc"},
|
||||||
"mysql": {"pymysql>=1.0.2"},
|
"mysql": {"pymysql>=1.0.2"},
|
||||||
"oracle": {"cx_Oracle"},
|
"oracle": {"cx_Oracle"},
|
||||||
"pii-processor": {"pandas~=1.3.1"},
|
"pii-processor": pii_requirements,
|
||||||
"presto": {"pyhive~=0.6.3"},
|
"presto": {"pyhive~=0.6.3"},
|
||||||
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
|
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
|
||||||
"redshift": {"sqlalchemy-redshift", "GeoAlchemy2", "psycopg2-binary"},
|
"redshift": {"sqlalchemy-redshift", "GeoAlchemy2", "psycopg2-binary"},
|
||||||
"redshift-usage": {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
"redshift-usage": {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
||||||
"scheduler": scheduler_requirements,
|
"scheduler": scheduler_requirements,
|
||||||
"data-profiler": profiler_requirements,
|
"data-profiler": {"openmetadata-data-profiler"},
|
||||||
"snowflake": {"snowflake-sqlalchemy<=1.2.4"},
|
"snowflake": {"snowflake-sqlalchemy<=1.2.4"},
|
||||||
"snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"},
|
"snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"},
|
||||||
"sample-data": {"faker~=8.1.1"},
|
"sample-data": {"faker~=8.1.1","pandas~=1.3.1"},
|
||||||
"superset": {},
|
"superset": {},
|
||||||
"tableau": {"tableau-api-lib==0.1.22"},
|
"tableau": {"tableau-api-lib==0.1.22"},
|
||||||
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"}
|
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"}
|
||||||
@ -106,7 +107,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
build_options = {"includes": ["_cffi_backend"]}
|
build_options = {"includes": ["_cffi_backend"]}
|
||||||
setup(
|
setup(
|
||||||
name="openmetadata-ingestion",
|
name="openmetadata-ingestion",
|
||||||
version="0.2.2",
|
version="0.3.0",
|
||||||
url="https://open-metadata.org/",
|
url="https://open-metadata.org/",
|
||||||
author="OpenMetadata Committers",
|
author="OpenMetadata Committers",
|
||||||
license="Apache License 2.0",
|
license="Apache License 2.0",
|
||||||
|
@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type
|
|||||||
from urllib.parse import quote_plus
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from metadata.config.common import ConfigurationError
|
|
||||||
from metadata.generated.schema.entity.services.databaseService import DatabaseServiceType
|
from metadata.generated.schema.entity.services.databaseService import DatabaseServiceType
|
||||||
from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
|
from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
|
||||||
|
|
||||||
@ -78,7 +77,7 @@ class SQLConnectionConfig(ConfigModel):
|
|||||||
include_views: Optional[bool] = True
|
include_views: Optional[bool] = True
|
||||||
include_tables: Optional[bool] = True
|
include_tables: Optional[bool] = True
|
||||||
generate_sample_data: Optional[bool] = True
|
generate_sample_data: Optional[bool] = True
|
||||||
data_profiler_enabled: Optional[bool] = True
|
data_profiler_enabled: Optional[bool] = False
|
||||||
data_profiler_offset: Optional[int] = 0
|
data_profiler_offset: Optional[int] = 0
|
||||||
data_profiler_limit: Optional[int] = 50000
|
data_profiler_limit: Optional[int] = 50000
|
||||||
filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
|
filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user