mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-26 01:18:20 +00:00
feat(k8s): generalizes CronJob metadata ingestion resource for custom logic (#2467)
This commit is contained in:
parent
5ab1cbbbb2
commit
e2d8a93cf0
@ -14,7 +14,7 @@ type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
version: 0.2.1
|
||||
version: 0.2.2
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application.
|
||||
|
||||
@ -11,10 +11,12 @@ A Helm chart for datahub's metadata-ingestion framework with kerberos authentica
|
||||
| image.tag | string | `"latest"` | DataHub Ingestion image tag |
|
||||
| imagePullSecrets | array | `[]` (does not add image pull secrets to deployed pods) | Docker registry secret names as an array |
|
||||
| labels | string | `{}` | Metadata labels to be added to each crawling cron job |
|
||||
| crons | type | `[]` | A list of crawling parameters per different technology being crawler |
|
||||
| crons.name | string | `crawler` | Name of the crawler container |
|
||||
| crons.schedule | string | `""0 0 * * *"` | Cron expression (daily at midnight) for crawler jobs |
|
||||
| crons.crawlerConfigPath | string | N/A | Path to metadata configuration file. This must explicitly defined as a mount and is **required**. |
|
||||
| crons | type | `{}` | A map of crawling parameters per different technology being crawler, the key in the object will be used as the name for the new cron job |
|
||||
| crons.schedule | string | `"0 0 * * *"` | Cron expression (default is daily at midnight) for crawler jobs |
|
||||
| crons.recipe | object | `{}` | Recipe configuration to be executed (required) |
|
||||
| crons.recipe.configmapName | string | `""` | Name of configmap to be mounted containing recipe to be executed |
|
||||
| crons.recipe.fileName | string | `""` | Name of property within configMap referenced by `recipe.configName` with the concrete recipe definition |
|
||||
| crons.command | array | `["/bin/sh", "-c", "datahub ingest -c /etc/recipe/<crons.recipe.fileName>"]` | Array of strings denoting the crawling command to be invoked in the cron job. By default it will execute the recipe defined in the `crons.recipe` object. Cron crawling customization is possible by having extra volumes with custom logic to be executed. |
|
||||
| crons.hostAliases | array | `[]` | host aliases |
|
||||
| crons.env | object | `{}` | Environment variables to add to the cronjob container |
|
||||
| crons.envFromSecrets | object | `{}` | Environment variables from secrets to the cronjob container |
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
{{- $baseName := include "datahub-ingestion-cron.fullname" .}}
|
||||
{{- $labels := include "datahub-ingestion-cron.labels" .}}
|
||||
{{- range $job, $val := .Values.crons }}
|
||||
{{- range $jobName, $val := .Values.crons }}
|
||||
{{- $defaultCommand := printf "datahub ingest -c /etc/recipe/%s" $val.recipe.fileName }}
|
||||
apiVersion: batch/v1beta1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: "{{ $baseName }}-{{ .name }}"
|
||||
name: "{{ $baseName }}-{{ $jobName }}"
|
||||
labels: {{- $labels | nindent 4 }}
|
||||
spec:
|
||||
schedule: {{ default "0 0 * * *" .schedule | quote}}
|
||||
@ -24,17 +25,16 @@ spec:
|
||||
hostAliases: {{- include "common.tplvalues.render" (dict "value" .hostAliases "context" $) | nindent 10 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ default "crawler" .name }}
|
||||
- name: {{ $jobName }}-crawler
|
||||
image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag }}"
|
||||
imagePullPolicy: {{ $.Values.image.pullPolicy }}
|
||||
{{- if .extraVolumeMounts }}
|
||||
volumeMounts:
|
||||
- name: recipe
|
||||
mountPath: /etc/recipe
|
||||
{{- toYaml .extraVolumeMounts | nindent 14 }}
|
||||
{{- end }}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- datahub ingest -c {{ required "Path to configuration file is required" .crawlerConfigPath }}
|
||||
command: ["/bin/sh", "-c", {{ default $defaultCommand .command }} ]
|
||||
env:
|
||||
{{- if .env }}
|
||||
{{- range $key,$value := .env }}
|
||||
@ -52,8 +52,11 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
restartPolicy: OnFailure
|
||||
{{- if .extraVolumes }}
|
||||
volumes:
|
||||
- name: recipe
|
||||
configMap:
|
||||
name: {{ required "A valid .recipe.configmapName entry is required!" $val.recipe.configmapName }}
|
||||
{{- if .extraVolumes }}
|
||||
{{- toYaml .extraVolumes | nindent 12 }}
|
||||
{{- end }}
|
||||
---
|
||||
|
||||
@ -9,34 +9,44 @@ image:
|
||||
|
||||
imagePullSecrets: []
|
||||
|
||||
crons: []
|
||||
crons: {}
|
||||
#### Example data
|
||||
## Metadata ingestion name
|
||||
##
|
||||
#name: "crawler"
|
||||
#hive:
|
||||
## Daily at midnight (we may want to offset this to not conflict with other processes)
|
||||
#schedule: "0 0 * * *"
|
||||
|
||||
## Daily at midnight (we may want to offset this to not conflict with other processes)
|
||||
#schedule: "0 0 * * *"
|
||||
#recipe:
|
||||
# configmapName:
|
||||
# fileName:
|
||||
|
||||
## Deployment pod host aliases
|
||||
## https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/
|
||||
##
|
||||
#hostAliases: []
|
||||
## Command to be executed
|
||||
#command: ["/bin/sh", "-c", "datahub ingest -c <recipe.fileName>"]
|
||||
|
||||
## Environment variables.
|
||||
#env: {}
|
||||
## Deployment pod host aliases
|
||||
## https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/
|
||||
##
|
||||
#hostAliases: []
|
||||
|
||||
## Environment variables from Secret resources.
|
||||
#envFromSecrets: {}
|
||||
## Environment variables.
|
||||
#env: {}
|
||||
|
||||
## Additional primary volume mounts
|
||||
##
|
||||
#extraVolumeMounts: []
|
||||
## Environment variables from Secret resources.
|
||||
#envFromSecrets: {}
|
||||
|
||||
## Additional primary volumes
|
||||
##
|
||||
#extraVolumes: []
|
||||
## Additional primary volume mounts
|
||||
##
|
||||
#extraVolumeMounts:
|
||||
#- name: configmap-volume
|
||||
# mountPath: config.yml
|
||||
# subPath: config.yml
|
||||
|
||||
## Add your own init container or uncomment and modify the given example.
|
||||
##
|
||||
#extraInitContainers: {}
|
||||
## Additional primary volumes
|
||||
##
|
||||
#extraVolumes:
|
||||
#- name: configmap-volume
|
||||
# configMap:
|
||||
# name: crawler-config
|
||||
|
||||
## Add your own init container or uncomment and modify the given example.
|
||||
##
|
||||
#extraInitContainers: {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user