feat(k8s): generalizes CronJob metadata ingestion resource for custom logic (#2467)

This commit is contained in:
Pedro Silva 2021-05-14 03:57:55 +01:00 committed by GitHub
parent 5ab1cbbbb2
commit e2d8a93cf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 36 deletions

View File

@ -14,7 +14,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
version: 0.2.1
version: 0.2.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.

View File

@ -11,10 +11,12 @@ A Helm chart for datahub's metadata-ingestion framework with kerberos authentica
| image.tag | string | `"latest"` | DataHub Ingestion image tag |
| imagePullSecrets | array | `[]` (does not add image pull secrets to deployed pods) | Docker registry secret names as an array |
| labels | string | `{}` | Metadata labels to be added to each crawling cron job |
| crons | type | `[]` | A list of crawling parameters per different technology being crawler |
| crons.name | string | `crawler` | Name of the crawler container |
| crons.schedule | string | `""0 0 * * *"` | Cron expression (daily at midnight) for crawler jobs |
| crons.crawlerConfigPath | string | N/A | Path to metadata configuration file. This must explicitly defined as a mount and is **required**. |
| crons | type | `{}` | A map of crawling parameters per different technology being crawler, the key in the object will be used as the name for the new cron job |
| crons.schedule | string | `"0 0 * * *"` | Cron expression (default is daily at midnight) for crawler jobs |
| crons.recipe | object | `{}` | Recipe configuration to be executed (required) |
| crons.recipe.configmapName | string | `""` | Name of configmap to be mounted containing recipe to be executed |
| crons.recipe.fileName | string | `""` | Name of property within configMap referenced by `recipe.configName` with the concrete recipe definition |
| crons.command | array | `["/bin/sh", "-c", "datahub ingest -c /etc/recipe/<crons.recipe.fileName>"]` | Array of strings denoting the crawling command to be invoked in the cron job. By default it will execute the recipe defined in the `crons.recipe` object. Cron crawling customization is possible by having extra volumes with custom logic to be executed. |
| crons.hostAliases | array | `[]` | host aliases |
| crons.env | object | `{}` | Environment variables to add to the cronjob container |
| crons.envFromSecrets | object | `{}` | Environment variables from secrets to the cronjob container |

View File

@ -1,10 +1,11 @@
{{- $baseName := include "datahub-ingestion-cron.fullname" .}}
{{- $labels := include "datahub-ingestion-cron.labels" .}}
{{- range $job, $val := .Values.crons }}
{{- range $jobName, $val := .Values.crons }}
{{- $defaultCommand := printf "datahub ingest -c /etc/recipe/%s" $val.recipe.fileName }}
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: "{{ $baseName }}-{{ .name }}"
name: "{{ $baseName }}-{{ $jobName }}"
labels: {{- $labels | nindent 4 }}
spec:
schedule: {{ default "0 0 * * *" .schedule | quote}}
@ -24,17 +25,16 @@ spec:
hostAliases: {{- include "common.tplvalues.render" (dict "value" .hostAliases "context" $) | nindent 10 }}
{{- end }}
containers:
- name: {{ default "crawler" .name }}
- name: {{ $jobName }}-crawler
image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag }}"
imagePullPolicy: {{ $.Values.image.pullPolicy }}
{{- if .extraVolumeMounts }}
volumeMounts:
- name: recipe
mountPath: /etc/recipe
{{- toYaml .extraVolumeMounts | nindent 14 }}
{{- end }}
command:
- /bin/sh
- -c
- datahub ingest -c {{ required "Path to configuration file is required" .crawlerConfigPath }}
command: ["/bin/sh", "-c", {{ default $defaultCommand .command }} ]
env:
{{- if .env }}
{{- range $key,$value := .env }}
@ -52,8 +52,11 @@ spec:
{{- end }}
{{- end }}
restartPolicy: OnFailure
{{- if .extraVolumes }}
volumes:
- name: recipe
configMap:
name: {{ required "A valid .recipe.configmapName entry is required!" $val.recipe.configmapName }}
{{- if .extraVolumes }}
{{- toYaml .extraVolumes | nindent 12 }}
{{- end }}
---

View File

@ -9,34 +9,44 @@ image:
imagePullSecrets: []
crons: []
crons: {}
#### Example data
## Metadata ingestion name
##
#name: "crawler"
#hive:
## Daily at midnight (we may want to offset this to not conflict with other processes)
#schedule: "0 0 * * *"
## Daily at midnight (we may want to offset this to not conflict with other processes)
#schedule: "0 0 * * *"
#recipe:
# configmapName:
# fileName:
## Deployment pod host aliases
## https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/
##
#hostAliases: []
## Command to be executed
#command: ["/bin/sh", "-c", "datahub ingest -c <recipe.fileName>"]
## Environment variables.
#env: {}
## Deployment pod host aliases
## https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/
##
#hostAliases: []
## Environment variables from Secret resources.
#envFromSecrets: {}
## Environment variables.
#env: {}
## Additional primary volume mounts
##
#extraVolumeMounts: []
## Environment variables from Secret resources.
#envFromSecrets: {}
## Additional primary volumes
##
#extraVolumes: []
## Additional primary volume mounts
##
#extraVolumeMounts:
#- name: configmap-volume
# mountPath: config.yml
# subPath: config.yml
## Add your own init container or uncomment and modify the given example.
##
#extraInitContainers: {}
## Additional primary volumes
##
#extraVolumes:
#- name: configmap-volume
# configMap:
# name: crawler-config
## Add your own init container or uncomment and modify the given example.
##
#extraInitContainers: {}