From d9cd3e48565560ea7411b8e2995ffa389ff4471b Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Fri, 13 Aug 2021 02:29:36 +0530 Subject: [PATCH] Connector Dependency Cleanup and Docker Modification --- ingestion/Dockerfile | 5 ++++- ingestion/{pipelines => examples}/bigquery.json | 0 .../{pipelines => examples}/creds/bigquery-cred.json | 0 ingestion/{pipelines => examples}/hive.json | 0 .../ldap_user_to_catalog.json | 0 ingestion/{pipelines => examples}/mssql.json | 0 ingestion/{pipelines => examples}/postgres.json | 0 ingestion/{pipelines => examples}/redshift.json | 0 .../{pipelines => examples}/redshift_usage.json | 0 ingestion/{pipelines => examples}/snowflake.json | 6 +++--- .../{pipelines => examples}/snowflake_usage.json | 0 ingestion/ingestion_dependency.sh | 6 ++++-- .../creds/custom-name-320505-04c223f37dcf.json | 12 ------------ ingestion/setup.py | 2 -- 14 files changed, 11 insertions(+), 20 deletions(-) rename ingestion/{pipelines => examples}/bigquery.json (100%) rename ingestion/{pipelines => examples}/creds/bigquery-cred.json (100%) rename ingestion/{pipelines => examples}/hive.json (100%) rename ingestion/{pipelines => examples}/ldap_user_to_catalog.json (100%) rename ingestion/{pipelines => examples}/mssql.json (100%) rename ingestion/{pipelines => examples}/postgres.json (100%) rename ingestion/{pipelines => examples}/redshift.json (100%) rename ingestion/{pipelines => examples}/redshift_usage.json (100%) rename ingestion/{pipelines => examples}/snowflake.json (85%) rename ingestion/{pipelines => examples}/snowflake_usage.json (100%) delete mode 100644 ingestion/pipelines/creds/custom-name-320505-04c223f37dcf.json diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile index 6fc73276b09..a83d76fbe55 100644 --- a/ingestion/Dockerfile +++ b/ingestion/Dockerfile @@ -2,7 +2,10 @@ FROM python:3.9.2 EXPOSE 7777 -COPY . /openmetadata-ingestion +COPY ./examples /openmetadata-ingestion/examples +COPY ./pipelines /openmetadata-ingestion/pipelines +COPY ./ingestion_scheduler /openmetadata-ingestion/ingestion_scheduler +COPY ./ingestion_dependency.sh /openmetadata-ingestion/ingestion_dependency.sh WORKDIR /openmetadata-ingestion diff --git a/ingestion/pipelines/bigquery.json b/ingestion/examples/bigquery.json similarity index 100% rename from ingestion/pipelines/bigquery.json rename to ingestion/examples/bigquery.json diff --git a/ingestion/pipelines/creds/bigquery-cred.json b/ingestion/examples/creds/bigquery-cred.json similarity index 100% rename from ingestion/pipelines/creds/bigquery-cred.json rename to ingestion/examples/creds/bigquery-cred.json diff --git a/ingestion/pipelines/hive.json b/ingestion/examples/hive.json similarity index 100% rename from ingestion/pipelines/hive.json rename to ingestion/examples/hive.json diff --git a/ingestion/pipelines/ldap_user_to_catalog.json b/ingestion/examples/ldap_user_to_catalog.json similarity index 100% rename from ingestion/pipelines/ldap_user_to_catalog.json rename to ingestion/examples/ldap_user_to_catalog.json diff --git a/ingestion/pipelines/mssql.json b/ingestion/examples/mssql.json similarity index 100% rename from ingestion/pipelines/mssql.json rename to ingestion/examples/mssql.json diff --git a/ingestion/pipelines/postgres.json b/ingestion/examples/postgres.json similarity index 100% rename from ingestion/pipelines/postgres.json rename to ingestion/examples/postgres.json diff --git a/ingestion/pipelines/redshift.json b/ingestion/examples/redshift.json similarity index 100% rename from ingestion/pipelines/redshift.json rename to ingestion/examples/redshift.json diff --git a/ingestion/pipelines/redshift_usage.json b/ingestion/examples/redshift_usage.json similarity index 100% rename from ingestion/pipelines/redshift_usage.json rename to ingestion/examples/redshift_usage.json diff --git a/ingestion/pipelines/snowflake.json b/ingestion/examples/snowflake.json similarity index 85% rename from ingestion/pipelines/snowflake.json rename to ingestion/examples/snowflake.json index 47ac994583a..3075f111ac4 100644 --- a/ingestion/pipelines/snowflake.json +++ b/ingestion/examples/snowflake.json @@ -11,9 +11,9 @@ "service_type": "Snowflake", "include_pattern": { "includes": [ - "(\\w)*.tpcds_sf100tcl.catalog_page", - "(\\w)*.tpcds_sf100tcl.time_dim", - "(\\w)*.tpcds_sf10tcl.catalog_page" + "(\\w)*tpcds_sf100tcl", + "(\\w)*tpcds_sf100tcl", + "(\\w)*tpcds_sf10tcl" ] } } diff --git a/ingestion/pipelines/snowflake_usage.json b/ingestion/examples/snowflake_usage.json similarity index 100% rename from ingestion/pipelines/snowflake_usage.json rename to ingestion/examples/snowflake_usage.json diff --git a/ingestion/ingestion_dependency.sh b/ingestion/ingestion_dependency.sh index 4acaa057693..da0c9490bee 100755 --- a/ingestion/ingestion_dependency.sh +++ b/ingestion/ingestion_dependency.sh @@ -17,5 +17,7 @@ # set -euo pipefail -pip install --upgrade pip setuptools -pip install '.[all]' +pip install --upgrade pip setuptools openmetadata-ingestion==0.2.1 apns +pip install openmetadata-ingestion[mysql,sample-tables,elasticsearch] +python -m spacy download en_core_web_sm +pip install "simplescheduler@git+https://github.com/StreamlineData/sdscheduler.git#egg=simplescheduler" diff --git a/ingestion/pipelines/creds/custom-name-320505-04c223f37dcf.json b/ingestion/pipelines/creds/custom-name-320505-04c223f37dcf.json deleted file mode 100644 index 56d8d295eb5..00000000000 --- a/ingestion/pipelines/creds/custom-name-320505-04c223f37dcf.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": "service_account", - "project_id": "custom-name-320505", - "private_key_id": "04c223f37dcf9590a3033f22679e1a74c39776e5", - "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCYC15S0LekClU2\nuixtrvHQtKqnND3SbN5ffluU5JV+DV8XXiuNY/VSDGID7/OJ1FlcryA/leudFWRQ\njjqUa0F7incdoaYTMgPDtvvar7BCyqBUmOnElUODpsgbTx2KmSYCgnFfrex4iDp2\nWt+BcuLnJyS+ZCi4+OdxbVR8M2QK5ipNzgtMfXxTa+sh/pjPAJPI7/Ew1eVsr1Ya\nvzgqFkbMKZ87KkmqUQ10S0JX8UjYe7xB+JuaweccbdxqTYy8txIhAMBjLEfoieZL\nukaFnCslWsCRJeBQ9NyeLrWeKes4GnI/Xu4d1G5uoWUXeiyEXyYGyFxwXP1LJxAf\n2C9tDOYfAgMBAAECggEAC4o6r6zGxhfAraWGGhG//yK58cnjHVWvr8Nz6QJLybmd\nwN91YnG2nMfIKAYDeoUYU+gOLwqjy5Q5kmxRIZMIcSYqcfXNw0GLC++XVbBQVP7I\nb338VDQuCnp9+qX6bIVrifTjl6r9j+JrFDDugNvllpIf1abdeQ3vPGqGEeKGp9Wq\npUPe9+NGVZRX8H9Ff+txzB+tIh+KaZNPRnEOm3yaRVXSvw4SYAGEyj+hoM8YQ1ps\ncOZnFZAK5tOOlkyUwpqXybUEotPdFRoqqf/9SSRyXNENMs22vT3zvtiv9Xl4YJa0\ng1Uk9RZ2zKPurLDSydQ4gH+fXOqsvyOVNN2MDVFuIQKBgQDVfcuBxohPfv4Z+aGD\nebXV1Kb9abrWgfyBX9cWqKVA9GFmi653Yle8a8a2iV6DD0D9ukLSvDdBbwTWW8TZ\nZzGu6lIgzeygqfISHAU4A5EUMa+NU48e69TBfBcg/xbK0mDuNyLQ2OQgx1rn4LMn\n8ErO4RXu1AzpoOsDRtLkI71LIQKBgQC2UXaFUMOGWoo72s1dbVRUSFsbQy5Y+pTe\nTGRxuusJBRNq4Sy/ZdItuybBf11JiGprvTeKIldlxaTtBHa3/aRZkMHnyf4duIUo\npFRlx1+CuZxtvTHNiOA0AGcNzoNFEoaaJuiy65J6bGLsy1ep9M8lBnwugLdwDBsT\nuswReKtJPwKBgH9mezwtgoiWCwLrMhQoXHeHWEOfhayek1WNIOjXqQ4TB9DPCEUb\n/almU4X4gdsKDkmbjmb9hKfanXNh/h2SdFaiZHJJEK1r3RYrfH3Juxjm8ZEPzTex\nmhwAdjjtoOdWFhvx8SfaVoCtFrWLJL5vqeS88367Mh11NumUqSMVpM+hAoGAbHg1\nCoYNfg659Xhs/GvCY06VHbgvh/K1xTgv/lYV5czmS41igREEJf8VxMOQ7eHCZw0x\nUPazhrI7OGSSeU7ni/4Kz2umy2nvl0mFiPRAR9dEUSnhyoXn1ihzJ4y5Hswx1t7V\nTTXIdRutE3K7WjtzTnT4RvZ/rKMXLPrS1IY1aiMCgYEA0Tw5bLYHCh3xyJ+DyWgx\nUDbzYIebZ/OtlH8XndSOH3kdWSumNSLcasWYeGnHyPzdBoaN2PJgw+56Juz0fj98\nw0nsyn5b6rGHZNwmieypE5vSadImGySGDq05b0ZTwgaex52JZPOLgoHhnFdinxDQ\nUgbmMEhft+Tes2JtTxn+8Ak=\n-----END PRIVATE KEY-----\n", - "client_email": "ingestion-bot@custom-name-320505.iam.gserviceaccount.com", - "client_id": "106137785698555844583", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/ingestion-bot%40custom-name-320505.iam.gserviceaccount.com" -} diff --git a/ingestion/setup.py b/ingestion/setup.py index 42936bcac81..569df62e98f 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -37,9 +37,7 @@ def get_long_description(): base_requirements = { - "elasticsearch>=7.0.0,<8.0.0", "commonregex", - "requests>=2.25.1", "click<7.2.0,>=7.1.1", "expandvars>=0.6.5" "dataclasses>=0.8"