mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-02 13:43:22 +00:00
Docker fix (#1101)
* Docker Fix, User Ingestion Fix * Docker file optimized * ES docker version rolledback * Healthcheck removed - incompatible with 3.9 * Docker & Airflow Config files updated with - resolved comments * Openmetadata managed Airflow apis support added * Dockerfile updated * Docker file Updated with release package * Elasticsearch allocated memory increased
This commit is contained in:
parent
6139a58c9a
commit
c95f1542eb
@ -25,15 +25,17 @@ services:
|
||||
expose:
|
||||
- 3306
|
||||
networks:
|
||||
app_net:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.10
|
||||
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||
|
||||
networks:
|
||||
app_net:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.11
|
||||
expose:
|
||||
- 9200
|
||||
@ -57,7 +59,7 @@ services:
|
||||
depends_on:
|
||||
- mysql
|
||||
networks:
|
||||
app_net:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.13
|
||||
extra_hosts:
|
||||
- "localhost:172.16.239.10"
|
||||
@ -74,14 +76,14 @@ services:
|
||||
ports:
|
||||
- 8080:8080
|
||||
networks:
|
||||
- app_net
|
||||
- local_app_net
|
||||
extra_hosts:
|
||||
- "localhost:172.16.239.10"
|
||||
- "localhost:172.16.239.11"
|
||||
- "localhost:172.16.239.13"
|
||||
|
||||
networks:
|
||||
app_net:
|
||||
local_app_net:
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
|
@ -25,15 +25,17 @@ services:
|
||||
- 3306
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.239.10
|
||||
ipv4_address: 172.16.240.10
|
||||
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.239.11
|
||||
ipv4_address: 172.16.240.11
|
||||
ports:
|
||||
- 9200:9200
|
||||
- 9300:9300
|
||||
@ -52,10 +54,10 @@ services:
|
||||
- mysql
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.239.13
|
||||
ipv4_address: 172.16.240.13
|
||||
extra_hosts:
|
||||
- "localhost:172.16.239.10"
|
||||
- "elasticsearch:172.16.239.11"
|
||||
- "localhost:172.16.240.10"
|
||||
- "elasticsearch:172.16.240.11"
|
||||
|
||||
ingestion:
|
||||
image: openmetadata/ingestion:latest
|
||||
@ -68,13 +70,13 @@ services:
|
||||
networks:
|
||||
- app_net
|
||||
extra_hosts:
|
||||
- "localhost:172.16.239.10"
|
||||
- "localhost:172.16.239.11"
|
||||
- "localhost:172.16.239.13"
|
||||
- "localhost:172.16.240.10"
|
||||
- "localhost:172.16.240.11"
|
||||
- "localhost:172.16.240.13"
|
||||
|
||||
networks:
|
||||
app_net:
|
||||
ipam:
|
||||
driver: default
|
||||
config:
|
||||
- subnet: "172.16.239.0/24"
|
||||
- subnet: "172.16.240.0/24"
|
||||
|
@ -1,13 +1,24 @@
|
||||
FROM python:3.9-slim
|
||||
ENV AIRFLOW_HOME=/airflow
|
||||
WORKDIR /ingestion
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y gcc libsasl2-dev curl unixodbc-dev wget --no-install-recommends && \
|
||||
apt-get install -y gcc libsasl2-dev curl build-essential libssl-dev libffi-dev librdkafka-dev unixodbc-dev python3.9-dev libevent-dev wget --no-install-recommends && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
RUN pip install 'openmetadata-ingestion[sample-data,elasticsearch,mysql]' apache-airflow==2.1.4
|
||||
COPY ./ingestion /ingestion
|
||||
RUN pip install '.[sample-data,elasticsearch,mysql]'
|
||||
RUN wget https://github.com/open-metadata/openmetadata-airflow-apis/releases/download/0.1/openmetadata-airflow-apis-plugin.tar.gz
|
||||
RUN tar zxvf openmetadata-airflow-apis-plugin.tar.gz
|
||||
RUN mkdir /om-airflow
|
||||
RUN mv plugins /om-airflow
|
||||
ENV AIRFLOW_VERSION=2.2.1
|
||||
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
|
||||
RUN pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
|
||||
COPY ingestion /ingestion
|
||||
RUN pip install -e '.[all]' openmetadata-airflow-managed-apis
|
||||
RUN airflow db init
|
||||
RUN cp -r /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg
|
||||
RUN cp -r /om-airflow/plugins /airflow/plugins
|
||||
RUN cp -r /om-airflow/plugins/dag_templates /airflow/
|
||||
RUN mkdir -p /airflow/dag_generated_configs
|
||||
RUN cp -r /om-airflow/plugins/dag_managed_operators /airflow/
|
||||
RUN chmod 755 ingestion_dependency.sh
|
||||
EXPOSE 8080
|
||||
CMD [ "./ingestion_dependency.sh" ]
|
||||
CMD [ "./ingestion_dependency.sh" ]
|
||||
|
@ -419,6 +419,11 @@ airflow_service_name = local_airflow_3
|
||||
openmetadata_api_endpoint = http://localhost:8585/api
|
||||
auth_provider_type = no-auth
|
||||
|
||||
[openmetadata_airflow_apis]
|
||||
dag_runner_template = /airflow/dag_templates/dag_runner.j2
|
||||
dag_generated_configs = /airflow/dag_generated_configs
|
||||
dag_managed_operators = /airflow/dag_managed_operators
|
||||
|
||||
[atlas]
|
||||
sasl_enabled = False
|
||||
host =
|
||||
|
@ -12,6 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import pathlib
|
||||
from datetime import timedelta
|
||||
|
||||
@ -29,21 +30,45 @@ from metadata.ingestion.api.workflow import Workflow
|
||||
|
||||
default_args = {
|
||||
"owner": "user_name",
|
||||
"email": ["username@org.com"],
|
||||
"email_on_failure": False,
|
||||
"retries": 3,
|
||||
"retry_delay": timedelta(minutes=2),
|
||||
"execution_timeout": timedelta(minutes=60),
|
||||
}
|
||||
|
||||
config = """
|
||||
{
|
||||
"source": {
|
||||
"type": "metadata",
|
||||
"config": {
|
||||
"include_tables": "true",
|
||||
"include_topics": "true",
|
||||
"include_dashboards": "true",
|
||||
"limit_records": 10
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "elasticsearch",
|
||||
"config": {
|
||||
"index_tables": "true",
|
||||
"index_topics": "true",
|
||||
"index_dashboards": "true",
|
||||
"es_host": "localhost",
|
||||
"es_port": 9200
|
||||
}
|
||||
},
|
||||
"metadata_server": {
|
||||
"type": "metadata-server",
|
||||
"config": {
|
||||
"api_endpoint": "http://localhost:8585/api",
|
||||
"auth_provider_type": "no-auth"
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def metadata_ingestion_workflow():
|
||||
config_file = pathlib.Path("/ingestion/pipelines/metadata_to_es.json")
|
||||
workflow_config = load_config_file(config_file)
|
||||
|
||||
workflow_config = json.loads(config)
|
||||
workflow = Workflow.create(workflow_config)
|
||||
workflow.execute()
|
||||
workflow.raise_from_status()
|
||||
@ -52,7 +77,7 @@ def metadata_ingestion_workflow():
|
||||
|
||||
|
||||
with DAG(
|
||||
"elasticsearch",
|
||||
"index_metadata",
|
||||
default_args=default_args,
|
||||
description="An example DAG which runs a OpenMetadata ingestion workflow",
|
||||
start_date=days_ago(1),
|
||||
|
@ -12,18 +12,21 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import pathlib
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
import json
|
||||
|
||||
try:
|
||||
from airflow.operators.python import PythonOperator
|
||||
except ModuleNotFoundError:
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
from metadata.config.common import load_config_file
|
||||
from metadata.ingestion.api.workflow import Workflow
|
||||
from airflow.utils.dates import days_ago
|
||||
|
||||
default_args = {
|
||||
"owner": "user_name",
|
||||
@ -31,7 +34,7 @@ default_args = {
|
||||
"email_on_failure": False,
|
||||
"retries": 3,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
"execution_timeout": timedelta(minutes=60)
|
||||
"execution_timeout": timedelta(minutes=60),
|
||||
}
|
||||
|
||||
config = """
|
||||
@ -72,9 +75,9 @@ config = """
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def metadata_ingestion_workflow():
|
||||
workflow_config = json.loads(config)
|
||||
|
||||
workflow = Workflow.create(workflow_config)
|
||||
workflow.execute()
|
||||
workflow.raise_from_status()
|
||||
@ -94,4 +97,4 @@ with DAG(
|
||||
ingest_task = PythonOperator(
|
||||
task_id="ingest_using_recipe",
|
||||
python_callable=metadata_ingestion_workflow,
|
||||
)
|
||||
)
|
||||
|
@ -17,10 +17,6 @@
|
||||
#
|
||||
|
||||
while ! wget -O /dev/null -o /dev/null mysql:3306; do sleep 5; done
|
||||
export AIRFLOW_HOME=/airflow
|
||||
airflow db init
|
||||
echo "AUTH_ROLE_PUBLIC = 'Admin'" >> /airflow/webserver_config.py
|
||||
mv /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg
|
||||
airflow users create \
|
||||
--username admin \
|
||||
--firstname Peter \
|
||||
@ -28,11 +24,5 @@ airflow users create \
|
||||
--role Admin \
|
||||
--email spiderman@superhero.org \
|
||||
--password admin
|
||||
airflow webserver --port 8080 -D &
|
||||
(sleep 5; airflow db init)
|
||||
(sleep 5; airflow db init)
|
||||
(sleep 5; curl -u admin:admin --data '{"dag_run_id":"sample_data"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_data/dagRuns) &
|
||||
(sleep 7; curl -u admin:admin --data '{"dag_run_id":"sample_users"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_users/dagRuns) &
|
||||
(sleep 12; curl -u admin:admin --data '{"dag_run_id":"sample_usage"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_usage/dagRuns) &
|
||||
(sleep 17; curl -u admin:admin --data '{"dag_run_id":"elasticsearch"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/elasticsearch/dagRuns) &
|
||||
airflow scheduler
|
||||
airflow db upgrade
|
||||
airflow standalone
|
Loading…
x
Reference in New Issue
Block a user