mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 10:49:00 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			182 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			182 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/sh
 | |
| 
 | |
| set -e
 | |
| 
 | |
| : ${DATAHUB_ANALYTICS_ENABLED:=true}
 | |
| : ${USE_AWS_ELASTICSEARCH:=false}
 | |
| 
 | |
| if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
 | |
|     ELASTICSEARCH_PROTOCOL=https
 | |
| else
 | |
|     ELASTICSEARCH_PROTOCOL=http
 | |
| fi
 | |
| 
 | |
| if [[ -z $ELASTICSEARCH_USERNAME ]]; then
 | |
|     ELASTICSEARCH_HOST_URL=$ELASTICSEARCH_HOST
 | |
| else
 | |
|     ELASTICSEARCH_HOST_URL=$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD@$ELASTICSEARCH_HOST
 | |
| fi
 | |
| 
 | |
| function get_index_name() {
 | |
|   if [[ -z "$INDEX_PREFIX" ]]; then
 | |
|     echo $1
 | |
|   else
 | |
|     echo "${INDEX_PREFIX}_$1"
 | |
|   fi
 | |
| }
 | |
| 
 | |
| function generate_index_file() {
 | |
|   jq -n \
 | |
|     --slurpfile settings "$1" \
 | |
|     --slurpfile mappings "$2" \
 | |
|     '.settings=$settings[0] | .mappings=$mappings[0]' > "$3"
 | |
| }
 | |
| 
 | |
| function check_reindex() {
 | |
|     initial_documents=$(curl -XGET "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$1/_count" -H 'Content-Type: application/json' | jq '.count')
 | |
|     for i in $(seq 30); do
 | |
|       echo $i
 | |
|       reindexed_documents=$(curl -XGET "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$2/_count" -H 'Content-Type: application/json' | jq '.count')
 | |
|       if [[ $reindexed_documents == "$initial_documents" ]]; then
 | |
|         echo -e "\nPost-reindex document reconcialiation completed. doc_source_index_count: $initial_documents; doc_target_index_count: $reindexed_documents"
 | |
|         return 0
 | |
|       else
 | |
|         sleep 3
 | |
|       fi
 | |
|     done
 | |
| 
 | |
|     echo -e "\nPost-reindex document reconcialiation failed. doc_source_index_count: $initial_documents; doc_target_index_count: $reindexed_documents"
 | |
|     return 1
 | |
| }
 | |
| 
 | |
| function reindex() {
 | |
|   source_index=$1
 | |
|   target_index="$1_$(date +%s)"
 | |
| 
 | |
|   #create target index with latest index config
 | |
|   curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$target_index" -H 'Content-Type: application/json' --data @/tmp/data
 | |
| 
 | |
|   #reindex the documents in source index to target index.
 | |
|   # One of the assumption here is that we only add properties to document when index-config is evolved.
 | |
|   # In case a property is deleted from document, it will still be reindexed in target index as default behaviour and
 | |
|   # it is not breaking the code. If still needs to be purged from target index, use "removed" property in POST data.
 | |
|   curl -XPOST "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_reindex?pretty" -H 'Content-Type: application/json' \
 | |
|     -d "{\"source\":{\"index\":\"$source_index\"},\"dest\":{\"index\":\"$target_index\"}}"
 | |
| 
 | |
|   if check_reindex "$source_index" "$target_index"
 | |
|   then
 | |
|     #checking if source index is concrete index or alias
 | |
|     if [ $(curl -o /dev/null -s -w "%{http_code}" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_alias/$source_index") -eq 404 ]
 | |
|     then
 | |
|       curl -XDELETE "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$source_index"
 | |
|     else
 | |
|       concrete_index_name=$(curl -XGET "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_alias/$source_index" | jq 'keys[]' | head -1 | tr -d \")
 | |
|       curl -XDELETE "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$concrete_index_name"
 | |
|     fi
 | |
| 
 | |
|     curl -XPOST "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_aliases" -H 'Content-Type: application/json' \
 | |
|       -d "{\"actions\":[{\"remove\":{\"index\":\"*\",\"alias\":\"$source_index\"}},{\"add\":{\"index\":\"$target_index\",\"alias\":\"$source_index\"}}]}"
 | |
| 
 | |
|     echo -e "\nReindexing to $target_index succeded"
 | |
|     return 0
 | |
|   else
 | |
|     curl -XDELETE "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$target_index"
 | |
|     echo -e "\nReindexing to $target_index failed"
 | |
|     return 1
 | |
|   fi
 | |
| }
 | |
| 
 | |
| function create_index() {
 | |
|   generate_index_file "index/$2" "index/$3" /tmp/data
 | |
| 
 | |
|   #checking if index(or alias) exists
 | |
|   if [ $(curl -o /dev/null -s -w "%{http_code}" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$1") -eq 404 ]
 | |
|   then
 | |
|     echo -e '\ncreating index' "$1"
 | |
| 
 | |
|     curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$1" -H 'Content-Type: application/json' --data @/tmp/data
 | |
|     return 0
 | |
|   else
 | |
|     echo -e '\ncomparing with existing version of index' "$1"
 | |
| 
 | |
|     setting_keys_regex=$(jq '.index | keys[]' "index/$2" | xargs | sed 's/ /|/g')
 | |
|     curl -XGET "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$1/_settings" | \
 | |
|       jq '.. | .settings? | select(. != null)' | \
 | |
|       jq --arg KEYS_REGEX "$setting_keys_regex" '.index | with_entries(select(.key | match($KEYS_REGEX))) | {"index":.}' \
 | |
|       > /tmp/existing_setting
 | |
| 
 | |
|     curl -XGET "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/$1/_mapping" | \
 | |
|       jq '.. | .mappings? | select(. != null)' \
 | |
|       > /tmp/existing_mapping
 | |
| 
 | |
|     generate_index_file /tmp/existing_setting /tmp/existing_mapping /tmp/existing
 | |
| 
 | |
|     jq -S . /tmp/existing > /tmp/existing_sorted
 | |
|     jq -S . /tmp/data > /tmp/data_sorted
 | |
|     if diff /tmp/existing_sorted /tmp/data_sorted
 | |
|     then
 | |
|       echo -e "\nno changes to index $1 mappings and settings"
 | |
|       return 0
 | |
|     else
 | |
|       echo -e "\nupdating index" "$1"
 | |
| 
 | |
|       reindex "$1" && return 0 || return 1
 | |
|     fi
 | |
|   fi
 | |
| }
 | |
| 
 | |
| function create_datahub_usage_event_datastream() {
 | |
|   if [ $(curl -o /dev/null -s -w "%{http_code}" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_ilm/policy/datahub_usage_event_policy") -eq 404 ]
 | |
|   then
 | |
|     echo -e "\ncreating datahub_usage_event_policy"
 | |
|     curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_ilm/policy/datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/index/usage-event/policy.json
 | |
|   else
 | |
|     echo -e "\ndatahub_usage_event_policy exists"
 | |
|   fi
 | |
|   if [ $(curl -o /dev/null -s -w "%{http_code}" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_index_template/datahub_usage_event_index_template") -eq 404 ]
 | |
|   then
 | |
|     echo -e "\ncreating datahub_usage_event_index_template"
 | |
|     curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_index_template/datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/index/usage-event/index_template.json
 | |
|   else
 | |
|     echo -e "\ndatahub_usage_event_index_template exists"
 | |
|   fi
 | |
| }
 | |
| 
 | |
| function create_datahub_usage_event_aws_elasticsearch() {
 | |
|   if [ $(curl -o /dev/null -s -w "%{http_code}" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/datahub_usage_event_policy") -eq 404 ]
 | |
|   then
 | |
|     echo -e "\ncreating datahub_usage_event_policy"
 | |
|     curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/index/usage-event/aws_es_ism_policy.json
 | |
|   else
 | |
|     echo -e "\ndatahub_usage_event_policy exists"
 | |
|   fi
 | |
|   if [ $(curl -o /dev/null -s -w "%{http_code}" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_template/datahub_usage_event_index_template") -eq 404 ]
 | |
|   then
 | |
|     echo -e "\ncreating datahub_usage_event_index_template"
 | |
|     curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/_template/datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/index/usage-event/aws_es_index_template.json
 | |
|     curl -XPUT "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT/datahub_usage_event-000001"  -H 'Content-Type: application/json' --data "{\"aliases\":{\"datahub_usage_event\":{\"is_write_index\":true}}}"
 | |
|   else
 | |
|     echo -e "\ndatahub_usage_event_index_template exists"
 | |
|   fi
 | |
| }
 | |
| 
 | |
| create_index $(get_index_name chartdocument) chart/settings.json chart/mappings.json || exit 1
 | |
| create_index $(get_index_name corpuserinfodocument) corp-user/settings.json corp-user/mappings.json  || exit 1
 | |
| create_index $(get_index_name dashboarddocument) dashboard/settings.json dashboard/mappings.json  || exit 1
 | |
| create_index $(get_index_name datajobdocument) datajob/settings.json datajob/mappings.json || exit 1
 | |
| create_index $(get_index_name dataflowdocument) dataflow/settings.json dataflow/mappings.json || exit 1
 | |
| create_index $(get_index_name dataprocessdocument) data-process/settings.json data-process/mappings.json || exit 1
 | |
| create_index $(get_index_name datasetdocument) dataset/settings.json dataset/mappings.json || exit 1
 | |
| create_index $(get_index_name mlmodeldocument) ml-model/settings.json ml-model/mappings.json || exit 1
 | |
| create_index $(get_index_name tagdocument) tags/settings.json tags/mappings.json || exit 1
 | |
| create_index $(get_index_name glossaryterminfodocument) glossary/term/settings.json glossary/term/mappings.json || exit 1
 | |
| create_index $(get_index_name glossarynodeinfodocument) glossary/node/settings.json glossary/node/mappings.json || exit 1
 | |
| if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then
 | |
|   if [[ $USE_AWS_ELASTICSEARCH == false ]]; then
 | |
|     create_datahub_usage_event_datastream || exit 1
 | |
|   else
 | |
|     create_datahub_usage_event_aws_elasticsearch || exit 1
 | |
|   fi
 | |
| fi
 | |
| 
 | 
