mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-06-27 04:22:05 +00:00
Setup.py Refactored, ES port fix (#521)
* Pylint build failure fixed * Setup & dependency modified, Data profiler default to False, ES port fix * Profiler requirements refactored * Setup.py requirement fix * openmetadata-ingestion version upgrade
This commit is contained in:
parent
3937971959
commit
7652baa00d
@ -1,103 +0,0 @@
|
||||
---
|
||||
description: This guide will help install Redshift connector and run manually
|
||||
---
|
||||
|
||||
# Redshift
|
||||
|
||||
{% hint style="info" %}
|
||||
**Prerequisites**
|
||||
|
||||
OpenMetadata is built using Java, DropWizard, Jetty, and MySQL.
|
||||
|
||||
1. Python 3.7 or above
|
||||
{% endhint %}
|
||||
|
||||
### Install from PyPI or Source
|
||||
|
||||
{% tabs %}
|
||||
{% tab title="Install Using PyPI" %}
|
||||
```bash
|
||||
pip install 'openmetadata-ingestion[redshift]'
|
||||
python -m spacy download en_core_web_sm
|
||||
```
|
||||
{% endtab %}
|
||||
{% endtabs %}
|
||||
|
||||
## Run Manually
|
||||
|
||||
```bash
|
||||
metadata ingest -c ./examples/workflows/redshift.json
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
{% code title="redshift.json" %}
|
||||
```javascript
|
||||
{
|
||||
"source": {
|
||||
"type": "redshift",
|
||||
"config": {
|
||||
"host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
|
||||
"username": "awsuser",
|
||||
"password": "focguC-kaqqe5-nepsok",
|
||||
"database": "warehouse",
|
||||
"service_name": "aws_redshift",
|
||||
"filter_pattern": {
|
||||
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
|
||||
}
|
||||
}
|
||||
},
|
||||
...
|
||||
```
|
||||
{% endcode %}
|
||||
|
||||
1. **username** - pass the Redshift username. We recommend creating a user with read-only permissions to all the databases in your Redshift installation
|
||||
2. **password** - password for the username
|
||||
3. **service\_name** - Service Name for this Redshift cluster. If you added Redshift cluster through OpenMetadata UI, make sure the service name matches the same.
|
||||
4. **filter\_pattern** - It contains includes, excludes options to choose which pattern of datasets you want to ingest into OpenMetadata
|
||||
|
||||
## Publish to OpenMetadata
|
||||
|
||||
Below is the configuration to publish Redshift data into the OpenMeatadata service.
|
||||
|
||||
Add optionally `pii` processor and `metadata-rest-tables` sink along with `metadata-server` config
|
||||
|
||||
{% code title="redshift.json" %}
|
||||
```javascript
|
||||
{
|
||||
"source": {
|
||||
"type": "redshift",
|
||||
"config": {
|
||||
"host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
|
||||
"username": "awsuser",
|
||||
"password": "focguC-kaqqe5-nepsok",
|
||||
"database": "warehouse",
|
||||
"service_name": "aws_redshift",
|
||||
"filter_pattern": {
|
||||
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "metadata-rest",
|
||||
"config": {}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"minute": "*/5",
|
||||
"hour": null,
|
||||
"day": null,
|
||||
"month": null,
|
||||
"day_of_week": null
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
{% endcode %}
|
||||
|
@ -1,33 +0,0 @@
|
||||
{
|
||||
"source": {
|
||||
"type": "redshift",
|
||||
"config": {
|
||||
"host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
|
||||
"username": "awsuser",
|
||||
"password": "focguC-kaqqe5-nepsok",
|
||||
"database": "warehouse",
|
||||
"service_name": "aws_redshift",
|
||||
"filter_pattern": {
|
||||
"excludes": ["information_schema.*", "[\\w]*event_vw.*"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "metadata-rest",
|
||||
"config": {}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"minute": "*/5",
|
||||
"hour": null,
|
||||
"day": null,
|
||||
"month": null,
|
||||
"day_of_week": null
|
||||
}
|
||||
}
|
@ -14,7 +14,7 @@
|
||||
"index_topics": "true",
|
||||
"index_dashboards": "true",
|
||||
"es_host": "localhost",
|
||||
"es_port": 9300
|
||||
"es_port": 9200
|
||||
}
|
||||
},
|
||||
"metadata_server": {
|
||||
|
@ -41,9 +41,6 @@ scheduler_requirements = {
|
||||
"simplescheduler@git+git://github.com/open-metadata/simplescheduler.git#egg=simplescheduler"
|
||||
}
|
||||
|
||||
profiler_requirements = {
|
||||
"openmetadata-data-profiler@git+git://github.com/open-metadata/data-profiler.git#egg=openmetadata-data-profiler"
|
||||
}
|
||||
|
||||
base_requirements = {
|
||||
"commonregex",
|
||||
@ -65,10 +62,14 @@ base_requirements = {
|
||||
"okta>=1.7.0",
|
||||
"sqlalchemy>=1.3.24",
|
||||
"sql-metadata~=2.0.0",
|
||||
"spacy==3.0.5",
|
||||
"requests~=2.25.1",
|
||||
"en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web"
|
||||
"requests~=2.25.1"
|
||||
}
|
||||
pii_requirements = {
|
||||
"en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web",
|
||||
"pandas~=1.3.1",
|
||||
"spacy==3.0.5"
|
||||
}
|
||||
|
||||
base_plugins = {
|
||||
"query-parser",
|
||||
"metadata-usage",
|
||||
@ -88,16 +89,16 @@ plugins: Dict[str, Set[str]] = {
|
||||
"mssql-odbc": {"pyodbc"},
|
||||
"mysql": {"pymysql>=1.0.2"},
|
||||
"oracle": {"cx_Oracle"},
|
||||
"pii-processor": {"pandas~=1.3.1"},
|
||||
"pii-processor": pii_requirements,
|
||||
"presto": {"pyhive~=0.6.3"},
|
||||
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
|
||||
"redshift": {"sqlalchemy-redshift", "GeoAlchemy2", "psycopg2-binary"},
|
||||
"redshift-usage": {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
||||
"scheduler": scheduler_requirements,
|
||||
"data-profiler": profiler_requirements,
|
||||
"data-profiler": {"openmetadata-data-profiler"},
|
||||
"snowflake": {"snowflake-sqlalchemy<=1.2.4"},
|
||||
"snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"},
|
||||
"sample-data": {"faker~=8.1.1"},
|
||||
"sample-data": {"faker~=8.1.1","pandas~=1.3.1"},
|
||||
"superset": {},
|
||||
"tableau": {"tableau-api-lib==0.1.22"},
|
||||
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"}
|
||||
@ -106,7 +107,7 @@ plugins: Dict[str, Set[str]] = {
|
||||
build_options = {"includes": ["_cffi_backend"]}
|
||||
setup(
|
||||
name="openmetadata-ingestion",
|
||||
version="0.2.2",
|
||||
version="0.3.0",
|
||||
url="https://open-metadata.org/",
|
||||
author="OpenMetadata Committers",
|
||||
license="Apache License 2.0",
|
||||
|
@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from pydantic import ValidationError
|
||||
from metadata.config.common import ConfigurationError
|
||||
from metadata.generated.schema.entity.services.databaseService import DatabaseServiceType
|
||||
from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
|
||||
|
||||
@ -78,7 +77,7 @@ class SQLConnectionConfig(ConfigModel):
|
||||
include_views: Optional[bool] = True
|
||||
include_tables: Optional[bool] = True
|
||||
generate_sample_data: Optional[bool] = True
|
||||
data_profiler_enabled: Optional[bool] = True
|
||||
data_profiler_enabled: Optional[bool] = False
|
||||
data_profiler_offset: Optional[int] = 0
|
||||
data_profiler_limit: Optional[int] = 50000
|
||||
filter_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
|
||||
|
Loading…
x
Reference in New Issue
Block a user