From 5ac6710c9c03fc62ef16612bd5f6903efa5ba3a0 Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Wed, 20 Oct 2021 00:19:08 +0530 Subject: [PATCH] Pydantic fix, Docker update (#860) * Setup.py Modified with openmetadata-airflow package, docker update * Setup.py Modified * Update setup.py --- docker/metadata/docker-compose.yml | 25 ++++++++--- docker/metadata/openmetadata-start.sh | 2 +- ingestion/Dockerfile | 22 ++++------ ingestion/setup.py | 61 +++++++++++++++------------ ingestion/src/setup.py | 15 +++++++ 5 files changed, 76 insertions(+), 49 deletions(-) create mode 100644 ingestion/src/setup.py diff --git a/docker/metadata/docker-compose.yml b/docker/metadata/docker-compose.yml index 8a6310746fc..d8c4c5ff291 100644 --- a/docker/metadata/docker-compose.yml +++ b/docker/metadata/docker-compose.yml @@ -34,8 +34,6 @@ services: image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 environment: - discovery.type=single-node - depends_on: - - openmetadata-server networks: app_net: ipv4_address: 172.16.239.11 @@ -43,6 +41,20 @@ services: - 9200:9200 - 9300:9300 + db2: + extends: db + restart: always + environment: + MYSQL_ROOT_PASSWORD: password + MYSQL_USER: airflow_user + MYSQL_PASSWORD: airflow_pass + MYSQL_DATABASE: airflow_db + expose: + - 3306 + networks: + app_net: + ipv4_address: 172.16.239.12 + openmetadata-server: restart: always image: openmetadata/server:latest @@ -57,7 +69,7 @@ services: - db networks: app_net: - ipv4_address: 172.16.239.12 + ipv4_address: 172.16.239.13 extra_hosts: - "localhost:172.16.239.10" - "elasticsearch:172.16.239.11" @@ -65,15 +77,16 @@ services: ingestion: image: openmetadata/ingestion:latest expose: - - 7777 + - 8080 ports: - - 7777:7777 + - 8080:8080 networks: - app_net extra_hosts: - "localhost:172.16.239.10" - "localhost:172.16.239.11" - - "localhost:172.16.239.12" + - "airflow:172.16.239.12" + - "localhost:172.16.239.13" networks: app_net: diff --git a/docker/metadata/openmetadata-start.sh b/docker/metadata/openmetadata-start.sh index a7e5f00441f..de4536279ec 100644 --- a/docker/metadata/openmetadata-start.sh +++ b/docker/metadata/openmetadata-start.sh @@ -16,7 +16,7 @@ # limitations under the License. # -while ! curl -o - localhost:3306; do sleep 5; done +while ! wget -O /dev/null -o /dev/null localhost:3306; do sleep 5; done cp /openmetadata.yaml /openmetadata-*/conf/openmetadata.yaml cd /openmetadata-*/ ./bootstrap/bootstrap_storage.sh migrate diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile index 63555eb761e..1be1619be9a 100644 --- a/ingestion/Dockerfile +++ b/ingestion/Dockerfile @@ -1,19 +1,13 @@ -FROM python:3.8.10 - -EXPOSE 7777 - -COPY ./pipelines /openmetadata-ingestion/pipelines -COPY ./ingestion_scheduler /openmetadata-ingestion/ingestion_scheduler -COPY ./ingestion_dependency.sh /openmetadata-ingestion/ingestion_dependency.sh -COPY ./examples /openmetadata-ingestion/examples -WORKDIR /openmetadata-ingestion +FROM python:3.9-slim RUN apt-get update && \ apt-get install -y gcc libsasl2-dev unixodbc-dev --no-install-recommends && \ rm -rf /var/lib/apt/lists/* +RUN pip install 'openmetadata-ingestion[sample-data,elasticsearch,mysql]' apache-airflow==2.1.4 +ENV AIRFLOW_HOME=/airflow +COPY . /ingestion +WORKDIR /ingestion +RUN pip install -e '.[sample-data,elasticsearch,mysql]' +RUN chmod 755 ingestion_dependency.sh -RUN chmod 777 ingestion_dependency.sh - -RUN ./ingestion_dependency.sh - -CMD ["python","ingestion_scheduler/scheduler.py"] +CMD [ "./ingestion_dependency.sh" ] \ No newline at end of file diff --git a/ingestion/setup.py b/ingestion/setup.py index b84c7e4a2e4..8f61094d42b 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Set import os +from typing import Dict, Set -from setuptools import setup, find_namespace_packages +from setuptools import find_namespace_packages, setup def get_version(): @@ -36,7 +36,6 @@ def get_long_description(): return description - base_requirements = { "commonregex", "idna<3,>=2.5", @@ -46,7 +45,7 @@ base_requirements = { "typing_extensions>=3.7.4" "mypy_extensions>=0.4.3", "typing-inspect", - "pydantic==1.8.2", + "pydantic==1.7.4", "pydantic[email]>=1.7.2", "google>=3.0.0", "google-auth>=1.33.0", @@ -57,19 +56,19 @@ base_requirements = { "sqlalchemy>=1.3.24", "sql-metadata~=2.0.0", "requests~=2.26", - "PyYAML" + "PyYAML", } pii_requirements = { "en_core_web_sm@https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web", "pandas~=1.3.1", - "spacy==3.0.5" + "spacy==3.0.5", } report_requirements = { "asgiref==3.4.1", "Django==3.2.7", "pytz==2021.1", - "sqlparse==0.4.2" + "sqlparse==0.4.2", } base_plugins = { @@ -83,7 +82,12 @@ plugins: Dict[str, Set[str]] = { "bigquery": {"openmetadata-sqlalchemy-bigquery==0.2.0"}, "bigquery-usage": {"google-cloud-logging", "cachetools"}, "elasticsearch": {"elasticsearch~=7.13.1"}, - "hive": {"openmetadata-sqlalchemy-hive==0.2.0", "thrift~=0.13.0", "sasl==0.3.1", "thrift-sasl==0.4.3"}, + "hive": { + "openmetadata-sqlalchemy-hive==0.2.0", + "thrift~=0.13.0", + "sasl==0.3.1", + "thrift-sasl==0.4.3", + }, "kafka": {"confluent_kafka>=1.5.0", "fastavro>=1.2.0"}, "ldap-users": {"ldap3==2.9.1"}, "looker": {"looker-sdk==21.12.2"}, @@ -96,8 +100,16 @@ plugins: Dict[str, Set[str]] = { "trino": {"sqlalchemy-trino"}, "postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"}, "redash": {"redash-toolbelt==0.1.4"}, - "redshift": {"openmetadata-sqlalchemy-redshift==0.2.1", "psycopg2-binary", "GeoAlchemy2"}, - "redshift-usage": {"openmetadata-sqlalchemy-redshift==0.2.1", "psycopg2-binary", "GeoAlchemy2"}, + "redshift": { + "openmetadata-sqlalchemy-redshift==0.2.1", + "psycopg2-binary", + "GeoAlchemy2", + }, + "redshift-usage": { + "openmetadata-sqlalchemy-redshift==0.2.1", + "psycopg2-binary", + "GeoAlchemy2", + }, "data-profiler": {"openmetadata-data-profiler"}, "snowflake": {"snowflake-sqlalchemy<=1.2.4"}, "snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"}, @@ -106,13 +118,13 @@ plugins: Dict[str, Set[str]] = { "tableau": {"tableau-api-lib==0.1.22"}, "vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"}, "report-server": report_requirements, - "airflow": {"apache-airflow >= 1.10.2"} + "airflow": {"apache-airflow >= 1.10.2", "openmetadata-airflow==1.0"}, } build_options = {"includes": ["_cffi_backend"]} setup( name="openmetadata-ingestion", - version="0.3.2", + version="0.4.0", url="https://open-metadata.org/", author="OpenMetadata Committers", license="Apache License 2.0", @@ -123,33 +135,26 @@ setup( options={"build_exe": build_options}, package_dir={"": "src"}, zip_safe=False, - dependency_links=[ - - ], + dependency_links=[], project_urls={ "Documentation": "https://docs.open-metadata.org/", "Source": "https://github.com/open-metadata/OpenMetadata", }, - packages=find_namespace_packages(where='./src', exclude=['tests*']), + packages=find_namespace_packages(where="./src", exclude=["tests*"]), entry_points={ "console_scripts": ["metadata = metadata.cmd:metadata"], - "apache_airflow_provider": ["provider_info = airflow_provider_openmetadata:get_provider_config"], + "apache_airflow_provider": [ + "provider_info = airflow_provider_openmetadata:get_provider_config" + ], }, install_requires=list(base_requirements), extras_require={ "base": list(base_requirements), - **{ - plugin: list(dependencies) - for (plugin, dependencies) in plugins.items() - }, + **{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()}, "all": list( base_requirements.union( - *[ - requirements - for plugin, requirements in plugins.items() - ] + *[requirements for plugin, requirements in plugins.items()] ) - ) - } - + ), + }, ) diff --git a/ingestion/src/setup.py b/ingestion/src/setup.py new file mode 100644 index 00000000000..84c2039a2b7 --- /dev/null +++ b/ingestion/src/setup.py @@ -0,0 +1,15 @@ +from setuptools import find_namespace_packages, setup + +setup( + name="openmetadata-airflow", + version="1.0.dev0", + description="Python Distribution Utilities", + packages=find_namespace_packages( + where="./apache_airflow_provider", exclude=["tests*"] + ), + entry_points={ + "apache_airflow_provider": [ + "provider_info = airflow_provider_openmetadata:get_provider_config" + ], + }, +)