mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-02 13:43:22 +00:00
Docker fix (#1101)
* Docker Fix, User Ingestion Fix * Docker file optimized * ES docker version rolledback * Healthcheck removed - incompatible with 3.9 * Docker & Airflow Config files updated with - resolved comments * Openmetadata managed Airflow apis support added * Dockerfile updated * Docker file Updated with release package * Elasticsearch allocated memory increased
This commit is contained in:
parent
6139a58c9a
commit
c95f1542eb
@ -25,15 +25,17 @@ services:
|
|||||||
expose:
|
expose:
|
||||||
- 3306
|
- 3306
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
local_app_net:
|
||||||
ipv4_address: 172.16.239.10
|
ipv4_address: 172.16.239.10
|
||||||
|
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
|
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
|
||||||
environment:
|
environment:
|
||||||
- discovery.type=single-node
|
- discovery.type=single-node
|
||||||
|
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
local_app_net:
|
||||||
ipv4_address: 172.16.239.11
|
ipv4_address: 172.16.239.11
|
||||||
expose:
|
expose:
|
||||||
- 9200
|
- 9200
|
||||||
@ -57,7 +59,7 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- mysql
|
- mysql
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
local_app_net:
|
||||||
ipv4_address: 172.16.239.13
|
ipv4_address: 172.16.239.13
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "localhost:172.16.239.10"
|
- "localhost:172.16.239.10"
|
||||||
@ -74,14 +76,14 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- 8080:8080
|
- 8080:8080
|
||||||
networks:
|
networks:
|
||||||
- app_net
|
- local_app_net
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "localhost:172.16.239.10"
|
- "localhost:172.16.239.10"
|
||||||
- "localhost:172.16.239.11"
|
- "localhost:172.16.239.11"
|
||||||
- "localhost:172.16.239.13"
|
- "localhost:172.16.239.13"
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
local_app_net:
|
||||||
ipam:
|
ipam:
|
||||||
driver: default
|
driver: default
|
||||||
config:
|
config:
|
||||||
|
@ -25,15 +25,17 @@ services:
|
|||||||
- 3306
|
- 3306
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
app_net:
|
||||||
ipv4_address: 172.16.239.10
|
ipv4_address: 172.16.240.10
|
||||||
|
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
|
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
|
||||||
environment:
|
environment:
|
||||||
- discovery.type=single-node
|
- discovery.type=single-node
|
||||||
|
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
app_net:
|
||||||
ipv4_address: 172.16.239.11
|
ipv4_address: 172.16.240.11
|
||||||
ports:
|
ports:
|
||||||
- 9200:9200
|
- 9200:9200
|
||||||
- 9300:9300
|
- 9300:9300
|
||||||
@ -52,10 +54,10 @@ services:
|
|||||||
- mysql
|
- mysql
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
app_net:
|
||||||
ipv4_address: 172.16.239.13
|
ipv4_address: 172.16.240.13
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "localhost:172.16.239.10"
|
- "localhost:172.16.240.10"
|
||||||
- "elasticsearch:172.16.239.11"
|
- "elasticsearch:172.16.240.11"
|
||||||
|
|
||||||
ingestion:
|
ingestion:
|
||||||
image: openmetadata/ingestion:latest
|
image: openmetadata/ingestion:latest
|
||||||
@ -68,13 +70,13 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- app_net
|
- app_net
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "localhost:172.16.239.10"
|
- "localhost:172.16.240.10"
|
||||||
- "localhost:172.16.239.11"
|
- "localhost:172.16.240.11"
|
||||||
- "localhost:172.16.239.13"
|
- "localhost:172.16.240.13"
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
app_net:
|
app_net:
|
||||||
ipam:
|
ipam:
|
||||||
driver: default
|
driver: default
|
||||||
config:
|
config:
|
||||||
- subnet: "172.16.239.0/24"
|
- subnet: "172.16.240.0/24"
|
||||||
|
@ -1,13 +1,24 @@
|
|||||||
FROM python:3.9-slim
|
FROM python:3.9-slim
|
||||||
ENV AIRFLOW_HOME=/airflow
|
ENV AIRFLOW_HOME=/airflow
|
||||||
WORKDIR /ingestion
|
WORKDIR /ingestion
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y gcc libsasl2-dev curl unixodbc-dev wget --no-install-recommends && \
|
apt-get install -y gcc libsasl2-dev curl build-essential libssl-dev libffi-dev librdkafka-dev unixodbc-dev python3.9-dev libevent-dev wget --no-install-recommends && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
RUN pip install 'openmetadata-ingestion[sample-data,elasticsearch,mysql]' apache-airflow==2.1.4
|
RUN wget https://github.com/open-metadata/openmetadata-airflow-apis/releases/download/0.1/openmetadata-airflow-apis-plugin.tar.gz
|
||||||
COPY ./ingestion /ingestion
|
RUN tar zxvf openmetadata-airflow-apis-plugin.tar.gz
|
||||||
RUN pip install '.[sample-data,elasticsearch,mysql]'
|
RUN mkdir /om-airflow
|
||||||
|
RUN mv plugins /om-airflow
|
||||||
|
ENV AIRFLOW_VERSION=2.2.1
|
||||||
|
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
|
||||||
|
RUN pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
|
||||||
|
COPY ingestion /ingestion
|
||||||
|
RUN pip install -e '.[all]' openmetadata-airflow-managed-apis
|
||||||
|
RUN airflow db init
|
||||||
|
RUN cp -r /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg
|
||||||
|
RUN cp -r /om-airflow/plugins /airflow/plugins
|
||||||
|
RUN cp -r /om-airflow/plugins/dag_templates /airflow/
|
||||||
|
RUN mkdir -p /airflow/dag_generated_configs
|
||||||
|
RUN cp -r /om-airflow/plugins/dag_managed_operators /airflow/
|
||||||
RUN chmod 755 ingestion_dependency.sh
|
RUN chmod 755 ingestion_dependency.sh
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
CMD [ "./ingestion_dependency.sh" ]
|
CMD [ "./ingestion_dependency.sh" ]
|
||||||
|
@ -419,6 +419,11 @@ airflow_service_name = local_airflow_3
|
|||||||
openmetadata_api_endpoint = http://localhost:8585/api
|
openmetadata_api_endpoint = http://localhost:8585/api
|
||||||
auth_provider_type = no-auth
|
auth_provider_type = no-auth
|
||||||
|
|
||||||
|
[openmetadata_airflow_apis]
|
||||||
|
dag_runner_template = /airflow/dag_templates/dag_runner.j2
|
||||||
|
dag_generated_configs = /airflow/dag_generated_configs
|
||||||
|
dag_managed_operators = /airflow/dag_managed_operators
|
||||||
|
|
||||||
[atlas]
|
[atlas]
|
||||||
sasl_enabled = False
|
sasl_enabled = False
|
||||||
host =
|
host =
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
@ -29,21 +30,45 @@ from metadata.ingestion.api.workflow import Workflow
|
|||||||
|
|
||||||
default_args = {
|
default_args = {
|
||||||
"owner": "user_name",
|
"owner": "user_name",
|
||||||
"email": ["username@org.com"],
|
|
||||||
"email_on_failure": False,
|
|
||||||
"retries": 3,
|
"retries": 3,
|
||||||
"retry_delay": timedelta(minutes=2),
|
"retry_delay": timedelta(minutes=2),
|
||||||
"execution_timeout": timedelta(minutes=60),
|
"execution_timeout": timedelta(minutes=60),
|
||||||
}
|
}
|
||||||
|
|
||||||
config = """
|
config = """
|
||||||
|
{
|
||||||
|
"source": {
|
||||||
|
"type": "metadata",
|
||||||
|
"config": {
|
||||||
|
"include_tables": "true",
|
||||||
|
"include_topics": "true",
|
||||||
|
"include_dashboards": "true",
|
||||||
|
"limit_records": 10
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "elasticsearch",
|
||||||
|
"config": {
|
||||||
|
"index_tables": "true",
|
||||||
|
"index_topics": "true",
|
||||||
|
"index_dashboards": "true",
|
||||||
|
"es_host": "localhost",
|
||||||
|
"es_port": 9200
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata_server": {
|
||||||
|
"type": "metadata-server",
|
||||||
|
"config": {
|
||||||
|
"api_endpoint": "http://localhost:8585/api",
|
||||||
|
"auth_provider_type": "no-auth"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def metadata_ingestion_workflow():
|
def metadata_ingestion_workflow():
|
||||||
config_file = pathlib.Path("/ingestion/pipelines/metadata_to_es.json")
|
workflow_config = json.loads(config)
|
||||||
workflow_config = load_config_file(config_file)
|
|
||||||
|
|
||||||
workflow = Workflow.create(workflow_config)
|
workflow = Workflow.create(workflow_config)
|
||||||
workflow.execute()
|
workflow.execute()
|
||||||
workflow.raise_from_status()
|
workflow.raise_from_status()
|
||||||
@ -52,7 +77,7 @@ def metadata_ingestion_workflow():
|
|||||||
|
|
||||||
|
|
||||||
with DAG(
|
with DAG(
|
||||||
"elasticsearch",
|
"index_metadata",
|
||||||
default_args=default_args,
|
default_args=default_args,
|
||||||
description="An example DAG which runs a OpenMetadata ingestion workflow",
|
description="An example DAG which runs a OpenMetadata ingestion workflow",
|
||||||
start_date=days_ago(1),
|
start_date=days_ago(1),
|
||||||
|
@ -12,18 +12,21 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
from airflow import DAG
|
from airflow import DAG
|
||||||
import json
|
|
||||||
try:
|
try:
|
||||||
from airflow.operators.python import PythonOperator
|
from airflow.operators.python import PythonOperator
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
from airflow.operators.python_operator import PythonOperator
|
from airflow.operators.python_operator import PythonOperator
|
||||||
|
|
||||||
|
from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
from metadata.config.common import load_config_file
|
from metadata.config.common import load_config_file
|
||||||
from metadata.ingestion.api.workflow import Workflow
|
from metadata.ingestion.api.workflow import Workflow
|
||||||
from airflow.utils.dates import days_ago
|
|
||||||
|
|
||||||
default_args = {
|
default_args = {
|
||||||
"owner": "user_name",
|
"owner": "user_name",
|
||||||
@ -31,7 +34,7 @@ default_args = {
|
|||||||
"email_on_failure": False,
|
"email_on_failure": False,
|
||||||
"retries": 3,
|
"retries": 3,
|
||||||
"retry_delay": timedelta(minutes=5),
|
"retry_delay": timedelta(minutes=5),
|
||||||
"execution_timeout": timedelta(minutes=60)
|
"execution_timeout": timedelta(minutes=60),
|
||||||
}
|
}
|
||||||
|
|
||||||
config = """
|
config = """
|
||||||
@ -72,9 +75,9 @@ config = """
|
|||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def metadata_ingestion_workflow():
|
def metadata_ingestion_workflow():
|
||||||
workflow_config = json.loads(config)
|
workflow_config = json.loads(config)
|
||||||
|
|
||||||
workflow = Workflow.create(workflow_config)
|
workflow = Workflow.create(workflow_config)
|
||||||
workflow.execute()
|
workflow.execute()
|
||||||
workflow.raise_from_status()
|
workflow.raise_from_status()
|
||||||
@ -94,4 +97,4 @@ with DAG(
|
|||||||
ingest_task = PythonOperator(
|
ingest_task = PythonOperator(
|
||||||
task_id="ingest_using_recipe",
|
task_id="ingest_using_recipe",
|
||||||
python_callable=metadata_ingestion_workflow,
|
python_callable=metadata_ingestion_workflow,
|
||||||
)
|
)
|
||||||
|
@ -17,10 +17,6 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
while ! wget -O /dev/null -o /dev/null mysql:3306; do sleep 5; done
|
while ! wget -O /dev/null -o /dev/null mysql:3306; do sleep 5; done
|
||||||
export AIRFLOW_HOME=/airflow
|
|
||||||
airflow db init
|
|
||||||
echo "AUTH_ROLE_PUBLIC = 'Admin'" >> /airflow/webserver_config.py
|
|
||||||
mv /ingestion/examples/airflow/airflow.cfg /airflow/airflow.cfg
|
|
||||||
airflow users create \
|
airflow users create \
|
||||||
--username admin \
|
--username admin \
|
||||||
--firstname Peter \
|
--firstname Peter \
|
||||||
@ -28,11 +24,5 @@ airflow users create \
|
|||||||
--role Admin \
|
--role Admin \
|
||||||
--email spiderman@superhero.org \
|
--email spiderman@superhero.org \
|
||||||
--password admin
|
--password admin
|
||||||
airflow webserver --port 8080 -D &
|
airflow db upgrade
|
||||||
(sleep 5; airflow db init)
|
airflow standalone
|
||||||
(sleep 5; airflow db init)
|
|
||||||
(sleep 5; curl -u admin:admin --data '{"dag_run_id":"sample_data"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_data/dagRuns) &
|
|
||||||
(sleep 7; curl -u admin:admin --data '{"dag_run_id":"sample_users"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_users/dagRuns) &
|
|
||||||
(sleep 12; curl -u admin:admin --data '{"dag_run_id":"sample_usage"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/sample_usage/dagRuns) &
|
|
||||||
(sleep 17; curl -u admin:admin --data '{"dag_run_id":"elasticsearch"}' -H "Content-type: application/json" -X POST http://localhost:8080/api/v1/dags/elasticsearch/dagRuns) &
|
|
||||||
airflow scheduler
|
|
Loading…
x
Reference in New Issue
Block a user