Docker fix (#1101)

* Docker Fix, User Ingestion Fix

* Docker file optimized

* ES docker version rolledback

* Healthcheck removed - incompatible with 3.9

* Docker & Airflow Config files updated with - resolved comments

* Openmetadata managed Airflow apis support added

* Dockerfile updated

* Docker file Updated with release package

* Elasticsearch allocated memory increased
This commit is contained in:
Ayush Shah 2021-11-11 10:52:32 +05:30 committed by GitHub
parent 6139a58c9a
commit c95f1542eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 81 additions and 43 deletions

View File

@ -25,15 +25,17 @@ services:
expose: expose:
- 3306 - 3306
networks: networks:
app_net: local_app_net:
ipv4_address: 172.16.239.10 ipv4_address: 172.16.239.10
elasticsearch: elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
environment: environment:
- discovery.type=single-node - discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks: networks:
app_net: local_app_net:
ipv4_address: 172.16.239.11 ipv4_address: 172.16.239.11
expose: expose:
- 9200 - 9200
@ -57,7 +59,7 @@ services:
depends_on: depends_on:
- mysql - mysql
networks: networks:
app_net: local_app_net:
ipv4_address: 172.16.239.13 ipv4_address: 172.16.239.13
extra_hosts: extra_hosts:
- "localhost:172.16.239.10" - "localhost:172.16.239.10"
@ -74,14 +76,14 @@ services:
ports: ports:
- 8080:8080 - 8080:8080
networks: networks:
- app_net - local_app_net
extra_hosts: extra_hosts:
- "localhost:172.16.239.10" - "localhost:172.16.239.10"
- "localhost:172.16.239.11" - "localhost:172.16.239.11"
- "localhost:172.16.239.13" - "localhost:172.16.239.13"
networks: networks:
app_net: local_app_net:
ipam: ipam:
driver: default driver: default
config: config:

View File

@ -25,15 +25,17 @@ services:
- 3306 - 3306
networks: networks:
app_net: app_net:
ipv4_address: 172.16.239.10 ipv4_address: 172.16.240.10
elasticsearch: elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
environment: environment:
- discovery.type=single-node - discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks: networks:
app_net: app_net:
ipv4_address: 172.16.239.11 ipv4_address: 172.16.240.11
ports: ports:
- 9200:9200 - 9200:9200
- 9300:9300 - 9300:9300
@ -52,10 +54,10 @@ services:
- mysql - mysql
networks: networks:
app_net: app_net:
ipv4_address: 172.16.239.13 ipv4_address: 172.16.240.13
extra_hosts: extra_hosts:
- "localhost:172.16.239.10" - "localhost:172.16.240.10"
- "elasticsearch:172.16.239.11" - "elasticsearch:172.16.240.11"
ingestion: ingestion:
image: openmetadata/ingestion:latest image: openmetadata/ingestion:latest
@ -68,13 +70,13 @@ services:
networks: networks:
- app_net - app_net
extra_hosts: extra_hosts:
- "localhost:172.16.239.10" - "localhost:172.16.240.10"
- "localhost:172.16.239.11" - "localhost:172.16.240.11"
- "localhost:172.16.239.13" - "localhost:172.16.240.13"
networks: networks:
app_net: app_net:
ipam: ipam:
driver: default driver: default
config: config:
- subnet: "172.16.239.0/24" - subnet: "172.16.240.0/24"

View File

@ -1,13 +1,24 @@
FROM python:3.9-slim FROM python:3.9-slim
ENV AIRFLOW_HOME=/airflow ENV AIRFLOW_HOME=/airflow
WORKDIR /ingestion WORKDIR /ingestion
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y gcc libsasl2-dev curl unixodbc-dev wget --no-install-recommends && \ apt-get install -y gcc libsasl2-dev curl build-essential libssl-dev libffi-dev librdkafka-dev unixodbc-dev python3.9-dev libevent-dev wget --no-install-recommends && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN pip install 'openmetadata-ingestion[sample-data,elasticsearch,mysql]' apache-airflow==2.1.4 RUN wget https://github.com/open-metadata/openmetadata-airflow-apis/releases/download/0.1/openmetadata-airflow-apis-plugin.tar.gz
COPY ./ingestion /ingestion RUN tar zxvf openmetadata-airflow-apis-plugin.tar.gz
RUN pip install '.[sample-data,elasticsearch,mysql]' RUN mkdir /om-airflow
RUN mv plugins /om-airflow
ENV AIRFLOW_VERSION=2.2.1
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
RUN pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
COPY ingestion /ingestion
RUN pip install -e '.[all]' openmetadata-airflow-managed-apis
RUN airflow db init
RUN cp -r /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg
RUN cp -r /om-airflow/plugins /airflow/plugins
RUN cp -r /om-airflow/plugins/dag_templates /airflow/
RUN mkdir -p /airflow/dag_generated_configs
RUN cp -r /om-airflow/plugins/dag_managed_operators /airflow/
RUN chmod 755 ingestion_dependency.sh RUN chmod 755 ingestion_dependency.sh
EXPOSE 8080 EXPOSE 8080
CMD [ "./ingestion_dependency.sh" ] CMD [ "./ingestion_dependency.sh" ]

View File

@ -419,6 +419,11 @@ airflow_service_name = local_airflow_3
openmetadata_api_endpoint = http://localhost:8585/api openmetadata_api_endpoint = http://localhost:8585/api
auth_provider_type = no-auth auth_provider_type = no-auth
[openmetadata_airflow_apis]
dag_runner_template = /airflow/dag_templates/dag_runner.j2
dag_generated_configs = /airflow/dag_generated_configs
dag_managed_operators = /airflow/dag_managed_operators
[atlas] [atlas]
sasl_enabled = False sasl_enabled = False
host = host =

View File

@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json
import pathlib import pathlib
from datetime import timedelta from datetime import timedelta
@ -29,21 +30,45 @@ from metadata.ingestion.api.workflow import Workflow
default_args = { default_args = {
"owner": "user_name", "owner": "user_name",
"email": ["username@org.com"],
"email_on_failure": False,
"retries": 3, "retries": 3,
"retry_delay": timedelta(minutes=2), "retry_delay": timedelta(minutes=2),
"execution_timeout": timedelta(minutes=60), "execution_timeout": timedelta(minutes=60),
} }
config = """ config = """
{
"source": {
"type": "metadata",
"config": {
"include_tables": "true",
"include_topics": "true",
"include_dashboards": "true",
"limit_records": 10
}
},
"sink": {
"type": "elasticsearch",
"config": {
"index_tables": "true",
"index_topics": "true",
"index_dashboards": "true",
"es_host": "localhost",
"es_port": 9200
}
},
"metadata_server": {
"type": "metadata-server",
"config": {
"api_endpoint": "http://localhost:8585/api",
"auth_provider_type": "no-auth"
}
}
}
""" """
def metadata_ingestion_workflow(): def metadata_ingestion_workflow():
config_file = pathlib.Path("/ingestion/pipelines/metadata_to_es.json") workflow_config = json.loads(config)
workflow_config = load_config_file(config_file)
workflow = Workflow.create(workflow_config) workflow = Workflow.create(workflow_config)
workflow.execute() workflow.execute()
workflow.raise_from_status() workflow.raise_from_status()
@ -52,7 +77,7 @@ def metadata_ingestion_workflow():
with DAG( with DAG(
"elasticsearch", "index_metadata",
default_args=default_args, default_args=default_args,
description="An example DAG which runs a OpenMetadata ingestion workflow", description="An example DAG which runs a OpenMetadata ingestion workflow",
start_date=days_ago(1), start_date=days_ago(1),

View File

@ -12,18 +12,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json
import pathlib import pathlib
from datetime import timedelta from datetime import timedelta
from airflow import DAG from airflow import DAG
import json
try: try:
from airflow.operators.python import PythonOperator from airflow.operators.python import PythonOperator
except ModuleNotFoundError: except ModuleNotFoundError:
from airflow.operators.python_operator import PythonOperator from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from metadata.config.common import load_config_file from metadata.config.common import load_config_file
from metadata.ingestion.api.workflow import Workflow from metadata.ingestion.api.workflow import Workflow
from airflow.utils.dates import days_ago
default_args = { default_args = {
"owner": "user_name", "owner": "user_name",
@ -31,7 +34,7 @@ default_args = {
"email_on_failure": False, "email_on_failure": False,
"retries": 3, "retries": 3,
"retry_delay": timedelta(minutes=5), "retry_delay": timedelta(minutes=5),
"execution_timeout": timedelta(minutes=60) "execution_timeout": timedelta(minutes=60),
} }
config = """ config = """
@ -72,9 +75,9 @@ config = """
} }
""" """
def metadata_ingestion_workflow(): def metadata_ingestion_workflow():
workflow_config = json.loads(config) workflow_config = json.loads(config)
workflow = Workflow.create(workflow_config) workflow = Workflow.create(workflow_config)
workflow.execute() workflow.execute()
workflow.raise_from_status() workflow.raise_from_status()
@ -94,4 +97,4 @@ with DAG(
ingest_task = PythonOperator( ingest_task = PythonOperator(
task_id="ingest_using_recipe", task_id="ingest_using_recipe",
python_callable=metadata_ingestion_workflow, python_callable=metadata_ingestion_workflow,
) )

View File

@ -17,10 +17,6 @@
# #
while ! wget -O /dev/null -o /dev/null mysql:3306; do sleep 5; done while ! wget -O /dev/null -o /dev/null mysql:3306; do sleep 5; done
export AIRFLOW_HOME=/airflow
airflow db init
echo "AUTH_ROLE_PUBLIC = 'Admin'" >> /airflow/webserver_config.py
mv /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg
airflow users create \ airflow users create \
--username admin \ --username admin \
--firstname Peter \ --firstname Peter \
@ -28,11 +24,5 @@ airflow users create \
--role Admin \ --role Admin \
--email spiderman@superhero.org \ --email spiderman@superhero.org \
--password admin --password admin
airflow webserver --port 8080 -D & airflow db upgrade
(sleep 5; airflow db init) airflow standalone
(sleep 5; airflow db init)
(sleep 5; curl -u admin:admin --data '{"dag_run_id":"sample_data"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_data/dagRuns) &
(sleep 7; curl -u admin:admin --data '{"dag_run_id":"sample_users"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_users/dagRuns) &
(sleep 12; curl -u admin:admin --data '{"dag_run_id":"sample_usage"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_usage/dagRuns) &
(sleep 17; curl -u admin:admin --data '{"dag_run_id":"elasticsearch"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/elasticsearch/dagRuns) &
airflow scheduler