diff --git a/contrib/kubernetes/README.md b/contrib/kubernetes/README.md new file mode 100644 index 0000000000..3df35d263d --- /dev/null +++ b/contrib/kubernetes/README.md @@ -0,0 +1,40 @@ +# Kubernetes Setup for DataHub + +## Introduction +This directory provides the Kubernetes setup for DataHub. This is the first version with simple YAML files. +The next version will contain DataHub [Helm](https://helm.sh/) chart that can be published to [Helm Hub](https://hub.helm.sh/) + +## Setup +This kubernetes deployment doesn't contain the below artifacts. The idea is to use the original helm charts for deploying each of these separately. + +* Kafka and Schema Registry [Chart Link](https://github.com/confluentinc/cp-helm-charts/tree/master/charts/cp-kafka) +* Elasticsearch [Chart Link](https://hub.helm.sh/charts/elastic/elasticsearch) +* Mysql [Chart Link](https://hub.helm.sh/charts/stable/mysql) +* Neo4j [Chart Link](https://hub.helm.sh/charts/stable/neo4j) + +Also, these can be installed on-prem or can be leveraged as managed service on any cloud platform. + +## Quickstart +1. Install Docker and Kubernetes +2. Update the values in the configmap (datahub-configmap.yaml) with Docker hostname. For example +``` +ebean.datasource.host: "192.168.0.104:3306" +ebean.datasource.url: "jdbc:mysql://192.168.0.104:3306/datahub?verifyServerCertificate=false&useSSL=true" +kafka.bootstrap.server: "192.168.0.104:29092" +kafka.schemaregistry.url: "http://192.168.0.104:8081" +elasticsearch.host: "192.168.0.104" +neo4j.uri: "bolt://192.168.0.104" +``` +3. Create the configmap by running the following +``` +kubectl apply -f datahub-configmap.yaml +``` +4. Run the below kubectl command +``` +cd .. && kubectl apply -f kubernetes/ +``` +Please note that these steps will be updated once it is made into a Helm chart. + + +## Testing +For testing this setup, we can use the existing quickstart's [docker-compose](https://github.com/linkedin/datahub/blob/master/docker/quickstart/docker-compose.yml) file but commenting out `data-hub-gms`, `datahub-frontend`, `datahub-mce-consumer` & `datahub-mae-consumer` sections. diff --git a/contrib/kubernetes/datahub-configmap.yaml b/contrib/kubernetes/datahub-configmap.yaml new file mode 100644 index 0000000000..2b4900edee --- /dev/null +++ b/contrib/kubernetes/datahub-configmap.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: datahub-configuration + namespace: default +data: + datahub.app.version: "1.0" + datahub.play.mem.buffer.size: "10MB" + datahub.gms.host: "datahub-gms-service" + datahub.gms.port: "8080" + ebean.datasource.host: "mysql:3306" #Replace with mysql hostname (or Docker hostname if rest of services are started using docker-compose) + ebean.datasource.url: "jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true" #Replace with mysql hostname + ebean.datasource.driver: "com.mysql.jdbc.Driver" + kafka.bootstrap.server: "broker:29092" #Replace with kafka broker hostname (or Docker hostname if rest of services are started using docker-compose) + kafka.schemaregistry.url: "http://schema-registry:8081" #Replace with schema-registry hostname (or Docker hostname if rest of services are started using docker-compose) + elasticsearch.host: "elasticsearch" #Replace with elasticsearch hostname (or Docker hostname if rest of services are started using docker-compose) + elasticsearch.port: "9200" + neo4j.uri: "bolt://neo4j" #Replace with neo4j hostname (or Docker hostname if rest of services are started using docker-compose) diff --git a/contrib/kubernetes/datahub-frontend-deployment.yaml b/contrib/kubernetes/datahub-frontend-deployment.yaml new file mode 100644 index 0000000000..1559eeffa5 --- /dev/null +++ b/contrib/kubernetes/datahub-frontend-deployment.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: datahub-frontend-deployment + labels: + app: datahub-frontend +spec: + replicas: 1 + selector: + matchLabels: + app: datahub-frontend + template: + metadata: + labels: + app: datahub-frontend + spec: + containers: + - name: datahub-frontend + image: keremsahin/datahub-frontend:latest + ports: + - containerPort: 9001 + env: + - name: DATAHUB_GMS_HOST + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: datahub.gms.host + - name: DATAHUB_GMS_PORT + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: datahub.gms.port + - name: DATAHUB_SECRET + valueFrom: + secretKeyRef: + name: datahub-secret + key: datahub.gms.secret + - name: DATAHUB_APP_VERSION + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: datahub.app.version + - name: DATAHUB_PLAY_MEM_BUFFER_SIZE + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: datahub.play.mem.buffer.size +--- +apiVersion: v1 +kind: Service +metadata: + name: datahub-frontend-service + labels: + app: datahub-frontend +spec: + ports: + - port: 9001 + selector: + app: datahub-frontend + type: LoadBalancer \ No newline at end of file diff --git a/contrib/kubernetes/datahub-gms-deployment.yaml b/contrib/kubernetes/datahub-gms-deployment.yaml new file mode 100644 index 0000000000..8cf5ef6913 --- /dev/null +++ b/contrib/kubernetes/datahub-gms-deployment.yaml @@ -0,0 +1,96 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: datahub-gms-deployment + labels: + app: datahub-gms +spec: + replicas: 1 + selector: + matchLabels: + app: datahub-gms + template: + metadata: + labels: + app: datahub-gms + spec: + containers: + - name: datahub-gms + image: keremsahin/datahub-gms:latest + ports: + - containerPort: 8080 + command: ["/bin/bash", "-c" ,"java -jar jetty-runner-9.4.20.v20190813.jar gms.war"] + env: + - name: EBEAN_DATASOURCE_USERNAME + valueFrom: + secretKeyRef: + name: datahub-secret + key: ebean.datasource.username + - name: EBEAN_DATASOURCE_PASSWORD + valueFrom: + secretKeyRef: + name: datahub-secret + key: ebean.datasource.password + - name: EBEAN_DATASOURCE_HOST + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: ebean.datasource.host + - name: EBEAN_DATASOURCE_URL + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: ebean.datasource.url + - name: EBEAN_DATASOURCE_DRIVER + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: ebean.datasource.driver + - name: KAFKA_BOOTSTRAP_SERVER + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: kafka.bootstrap.server + - name: KAFKA_SCHEMAREGISTRY_URL + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: kafka.schemaregistry.url + - name: ELASTICSEARCH_HOST + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: elasticsearch.host + - name: ELASTICSEARCH_PORT + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: elasticsearch.port + - name: NEO4J_URI + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: neo4j.uri + - name: NEO4J_USERNAME + valueFrom: + secretKeyRef: + name: datahub-secret + key: neo4j.username + - name: NEO4J_PASSWORD + valueFrom: + secretKeyRef: + name: datahub-secret + key: neo4j.password +--- +apiVersion: v1 +kind: Service +metadata: + name: datahub-gms-service + labels: + app: datahub-gms +spec: + ports: + - port: 8080 + selector: + app: datahub-gms + type: LoadBalancer \ No newline at end of file diff --git a/contrib/kubernetes/datahub-mae-consumer-deployment.yaml b/contrib/kubernetes/datahub-mae-consumer-deployment.yaml new file mode 100644 index 0000000000..f7f1dff2a7 --- /dev/null +++ b/contrib/kubernetes/datahub-mae-consumer-deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: datahub-mae-consumer-deployment + labels: + app: datahub-mae-consumer +spec: + replicas: 1 + selector: + matchLabels: + app: datahub-mae-consumer + template: + metadata: + labels: + app: datahub-mae-consumer + spec: + containers: + - name: datahub-mae-consumer + image: keremsahin/datahub-mae-consumer:latest + command: ["/bin/bash", "-c" ,"./mae-consumer-job/bin/mae-consumer-job"] + env: + - name: KAFKA_BOOTSTRAP_SERVER + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: kafka.bootstrap.server + - name: KAFKA_SCHEMAREGISTRY_URL + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: kafka.schemaregistry.url + - name: ELASTICSEARCH_HOST + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: elasticsearch.host + - name: ELASTICSEARCH_PORT + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: elasticsearch.port + - name: NEO4J_URI + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: neo4j.uri + - name: NEO4J_USERNAME + valueFrom: + secretKeyRef: + name: datahub-secret + key: neo4j.username + - name: NEO4J_PASSWORD + valueFrom: + secretKeyRef: + name: datahub-secret + key: neo4j.password diff --git a/contrib/kubernetes/datahub-mce-consumer-deployment.yaml b/contrib/kubernetes/datahub-mce-consumer-deployment.yaml new file mode 100644 index 0000000000..137fcc0dfb --- /dev/null +++ b/contrib/kubernetes/datahub-mce-consumer-deployment.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: datahub-mce-consumer-deployment + labels: + app: datahub-mce-consumer +spec: + replicas: 1 + selector: + matchLabels: + app: datahub-mce-consumer + template: + metadata: + labels: + app: datahub-mce-consumer + spec: + containers: + - name: datahub-mce-consumer + image: keremsahin/datahub-mce-consumer:latest + command: ["/bin/bash", "-c" ,"./mce-consumer-job/bin/mce-consumer-job"] + env: + - name: KAFKA_BOOTSTRAP_SERVER + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: kafka.bootstrap.server + - name: KAFKA_SCHEMAREGISTRY_URL + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: kafka.schemaregistry.url + - name: DATAHUB_GMS_HOST + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: datahub.gms.host + - name: DATAHUB_GMS_PORT + valueFrom: + configMapKeyRef: + name: datahub-configuration + key: datahub.gms.port \ No newline at end of file diff --git a/contrib/kubernetes/datahub-secret.yaml b/contrib/kubernetes/datahub-secret.yaml new file mode 100644 index 0000000000..248fce9c80 --- /dev/null +++ b/contrib/kubernetes/datahub-secret.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Secret +metadata: + name: datahub-secret +type: Opaque +stringData: + datahub.gms.secret: "YouKnowNothing" + ebean.datasource.username: "datahub" + ebean.datasource.password: "datahub" + neo4j.username: "neo4j" + neo4j.password: "datahub"