From c95f1542ebd957bca54bd52e444474138ed5196a Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Thu, 11 Nov 2021 10:52:32 +0530 Subject: [PATCH] Docker fix (#1101) * Docker Fix, User Ingestion Fix * Docker file optimized * ES docker version rolledback * Healthcheck removed - incompatible with 3.9 * Docker & Airflow Config files updated with - resolved comments * Openmetadata managed Airflow apis support added * Dockerfile updated * Docker file Updated with release package * Elasticsearch allocated memory increased --- docker/local-metadata/docker-compose.yml | 12 +++--- docker/metadata/docker-compose.yml | 20 +++++----- ingestion/Dockerfile | 23 +++++++++--- ingestion/examples/airflow/airflow.cfg | 5 +++ .../airflow/dags/airflow_metadata_to_es.py | 37 ++++++++++++++++--- .../airflow/dags/airflow_sample_usage.py | 13 ++++--- ingestion/ingestion_dependency.sh | 14 +------ 7 files changed, 81 insertions(+), 43 deletions(-) diff --git a/docker/local-metadata/docker-compose.yml b/docker/local-metadata/docker-compose.yml index 90f45a08c95..251001042d2 100644 --- a/docker/local-metadata/docker-compose.yml +++ b/docker/local-metadata/docker-compose.yml @@ -25,15 +25,17 @@ services: expose: - 3306 networks: - app_net: + local_app_net: ipv4_address: 172.16.239.10 elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 environment: - discovery.type=single-node + - ES_JAVA_OPTS=-Xms1024m -Xmx1024m + networks: - app_net: + local_app_net: ipv4_address: 172.16.239.11 expose: - 9200 @@ -57,7 +59,7 @@ services: depends_on: - mysql networks: - app_net: + local_app_net: ipv4_address: 172.16.239.13 extra_hosts: - "localhost:172.16.239.10" @@ -74,14 +76,14 @@ services: ports: - 8080:8080 networks: - - app_net + - local_app_net extra_hosts: - "localhost:172.16.239.10" - "localhost:172.16.239.11" - "localhost:172.16.239.13" networks: - app_net: + local_app_net: ipam: driver: default config: diff --git a/docker/metadata/docker-compose.yml b/docker/metadata/docker-compose.yml index e8bb4a119a5..9277b3a2146 100644 --- a/docker/metadata/docker-compose.yml +++ b/docker/metadata/docker-compose.yml @@ -25,15 +25,17 @@ services: - 3306 networks: app_net: - ipv4_address: 172.16.239.10 + ipv4_address: 172.16.240.10 elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 environment: - discovery.type=single-node + - ES_JAVA_OPTS=-Xms1024m -Xmx1024m + networks: app_net: - ipv4_address: 172.16.239.11 + ipv4_address: 172.16.240.11 ports: - 9200:9200 - 9300:9300 @@ -52,10 +54,10 @@ services: - mysql networks: app_net: - ipv4_address: 172.16.239.13 + ipv4_address: 172.16.240.13 extra_hosts: - - "localhost:172.16.239.10" - - "elasticsearch:172.16.239.11" + - "localhost:172.16.240.10" + - "elasticsearch:172.16.240.11" ingestion: image: openmetadata/ingestion:latest @@ -68,13 +70,13 @@ services: networks: - app_net extra_hosts: - - "localhost:172.16.239.10" - - "localhost:172.16.239.11" - - "localhost:172.16.239.13" + - "localhost:172.16.240.10" + - "localhost:172.16.240.11" + - "localhost:172.16.240.13" networks: app_net: ipam: driver: default config: - - subnet: "172.16.239.0/24" + - subnet: "172.16.240.0/24" diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile index 550c5ade865..682ec94583f 100644 --- a/ingestion/Dockerfile +++ b/ingestion/Dockerfile @@ -1,13 +1,24 @@ FROM python:3.9-slim ENV AIRFLOW_HOME=/airflow WORKDIR /ingestion - RUN apt-get update && \ - apt-get install -y gcc libsasl2-dev curl unixodbc-dev wget --no-install-recommends && \ + apt-get install -y gcc libsasl2-dev curl build-essential libssl-dev libffi-dev librdkafka-dev unixodbc-dev python3.9-dev libevent-dev wget --no-install-recommends && \ rm -rf /var/lib/apt/lists/* -RUN pip install 'openmetadata-ingestion[sample-data,elasticsearch,mysql]' apache-airflow==2.1.4 -COPY ./ingestion /ingestion -RUN pip install '.[sample-data,elasticsearch,mysql]' +RUN wget https://github.com/open-metadata/openmetadata-airflow-apis/releases/download/0.1/openmetadata-airflow-apis-plugin.tar.gz +RUN tar zxvf openmetadata-airflow-apis-plugin.tar.gz +RUN mkdir /om-airflow +RUN mv plugins /om-airflow +ENV AIRFLOW_VERSION=2.2.1 +ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt" +RUN pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" +COPY ingestion /ingestion +RUN pip install -e '.[all]' openmetadata-airflow-managed-apis +RUN airflow db init +RUN cp -r /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg +RUN cp -r /om-airflow/plugins /airflow/plugins +RUN cp -r /om-airflow/plugins/dag_templates /airflow/ +RUN mkdir -p /airflow/dag_generated_configs +RUN cp -r /om-airflow/plugins/dag_managed_operators /airflow/ RUN chmod 755 ingestion_dependency.sh EXPOSE 8080 -CMD [ "./ingestion_dependency.sh" ] +CMD [ "./ingestion_dependency.sh" ] diff --git a/ingestion/examples/airflow/airflow.cfg b/ingestion/examples/airflow/airflow.cfg index e0f280d05fb..633c34e9286 100644 --- a/ingestion/examples/airflow/airflow.cfg +++ b/ingestion/examples/airflow/airflow.cfg @@ -419,6 +419,11 @@ airflow_service_name = local_airflow_3 openmetadata_api_endpoint = http://localhost:8585/api auth_provider_type = no-auth +[openmetadata_airflow_apis] +dag_runner_template = /airflow/dag_templates/dag_runner.j2 +dag_generated_configs = /airflow/dag_generated_configs +dag_managed_operators = /airflow/dag_managed_operators + [atlas] sasl_enabled = False host = diff --git a/ingestion/examples/airflow/dags/airflow_metadata_to_es.py b/ingestion/examples/airflow/dags/airflow_metadata_to_es.py index 8746a566387..d5300c3ce81 100644 --- a/ingestion/examples/airflow/dags/airflow_metadata_to_es.py +++ b/ingestion/examples/airflow/dags/airflow_metadata_to_es.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import pathlib from datetime import timedelta @@ -29,21 +30,45 @@ from metadata.ingestion.api.workflow import Workflow default_args = { "owner": "user_name", - "email": ["username@org.com"], - "email_on_failure": False, "retries": 3, "retry_delay": timedelta(minutes=2), "execution_timeout": timedelta(minutes=60), } config = """ +{ + "source": { + "type": "metadata", + "config": { + "include_tables": "true", + "include_topics": "true", + "include_dashboards": "true", + "limit_records": 10 + } + }, + "sink": { + "type": "elasticsearch", + "config": { + "index_tables": "true", + "index_topics": "true", + "index_dashboards": "true", + "es_host": "localhost", + "es_port": 9200 + } + }, + "metadata_server": { + "type": "metadata-server", + "config": { + "api_endpoint": "http://localhost:8585/api", + "auth_provider_type": "no-auth" + } + } +} """ def metadata_ingestion_workflow(): - config_file = pathlib.Path("/ingestion/pipelines/metadata_to_es.json") - workflow_config = load_config_file(config_file) - + workflow_config = json.loads(config) workflow = Workflow.create(workflow_config) workflow.execute() workflow.raise_from_status() @@ -52,7 +77,7 @@ def metadata_ingestion_workflow(): with DAG( - "elasticsearch", + "index_metadata", default_args=default_args, description="An example DAG which runs a OpenMetadata ingestion workflow", start_date=days_ago(1), diff --git a/ingestion/examples/airflow/dags/airflow_sample_usage.py b/ingestion/examples/airflow/dags/airflow_sample_usage.py index a8f59bc18cb..3427e183566 100644 --- a/ingestion/examples/airflow/dags/airflow_sample_usage.py +++ b/ingestion/examples/airflow/dags/airflow_sample_usage.py @@ -12,18 +12,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import pathlib from datetime import timedelta + from airflow import DAG -import json + try: from airflow.operators.python import PythonOperator except ModuleNotFoundError: from airflow.operators.python_operator import PythonOperator +from airflow.utils.dates import days_ago + from metadata.config.common import load_config_file from metadata.ingestion.api.workflow import Workflow -from airflow.utils.dates import days_ago default_args = { "owner": "user_name", @@ -31,7 +34,7 @@ default_args = { "email_on_failure": False, "retries": 3, "retry_delay": timedelta(minutes=5), - "execution_timeout": timedelta(minutes=60) + "execution_timeout": timedelta(minutes=60), } config = """ @@ -72,9 +75,9 @@ config = """ } """ + def metadata_ingestion_workflow(): workflow_config = json.loads(config) - workflow = Workflow.create(workflow_config) workflow.execute() workflow.raise_from_status() @@ -94,4 +97,4 @@ with DAG( ingest_task = PythonOperator( task_id="ingest_using_recipe", python_callable=metadata_ingestion_workflow, - ) \ No newline at end of file + ) diff --git a/ingestion/ingestion_dependency.sh b/ingestion/ingestion_dependency.sh index 96d67487314..6947935b838 100755 --- a/ingestion/ingestion_dependency.sh +++ b/ingestion/ingestion_dependency.sh @@ -17,10 +17,6 @@ # while ! wget -O /dev/null -o /dev/null mysql:3306; do sleep 5; done -export AIRFLOW_HOME=/airflow -airflow db init -echo "AUTH_ROLE_PUBLIC = 'Admin'" >> /airflow/webserver_config.py -mv /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg airflow users create \ --username admin \ --firstname Peter \ @@ -28,11 +24,5 @@ airflow users create \ --role Admin \ --email spiderman@superhero.org \ --password admin -airflow webserver --port 8080 -D & -(sleep 5; airflow db init) -(sleep 5; airflow db init) -(sleep 5; curl -u admin:admin --data '{"dag_run_id":"sample_data"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_data/dagRuns) & -(sleep 7; curl -u admin:admin --data '{"dag_run_id":"sample_users"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_users/dagRuns) & -(sleep 12; curl -u admin:admin --data '{"dag_run_id":"sample_usage"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_usage/dagRuns) & -(sleep 17; curl -u admin:admin --data '{"dag_run_id":"elasticsearch"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/elasticsearch/dagRuns) & -airflow scheduler \ No newline at end of file +airflow db upgrade +airflow standalone \ No newline at end of file