mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-04 12:51:23 +00:00 
			
		
		
		
	* Changes rollback behaviour to apply soft deletes by default Summary: Addresses feature request: Flag in delete command to only delete aspects touched by an ingestion run; add flag to nuke everything by modifying the default behaviour of a rollback operation which will not by default delete an entity if a keyAspect is being rolled-back. Instead the key aspect is kept and a StatusAspect is upserted with removed=true, effectively making a soft delete. Another PR will follow to perform garbage collection on these soft deleted entities. To keep old behaviour, a new parameter to the cli ingest rollback endpoint: --hard-delete was added. * Adds restli specs * Fixes deleteAspect endpoint & adds support for nested transactions * Enable regression test & fix docker-compose for local development * Add generated quickstart * Fix quickstart generation script * Adds missing var env to docker-compose-without-neo4j * Sets status removed=true when ingesting resources * Adds soft deletes for ElasticSearch + soft delete flags across ingestion sub-commands * Makes elastic search consistent * Update tests with new behaviour * apply review comments * apply review comment * Forces Elastic search to add documents with status removed false when ingesting * Reset gradle properties to default * Fix tests
		
			
				
	
	
		
			129 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			129 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import os
 | 
						|
import click
 | 
						|
import yaml
 | 
						|
from collections.abc import Mapping
 | 
						|
from dotenv import dotenv_values
 | 
						|
from yaml import Loader
 | 
						|
from collections import OrderedDict
 | 
						|
 | 
						|
 | 
						|
# Generates a merged docker-compose file with env variables inlined.
 | 
						|
# Usage: python3 docker_compose_cli_gen.py ../docker-compose.yml ../docker-compose.override.yml ../docker-compose-gen.yml
 | 
						|
 | 
						|
omitted_services = [
 | 
						|
    "kafka-rest-proxy",
 | 
						|
    "kafka-topics-ui",
 | 
						|
    "schema-registry-ui",
 | 
						|
    "kibana",
 | 
						|
]
 | 
						|
# Note that these are upper bounds on memory usage. Once exceeded, the container is killed.
 | 
						|
# Each service will be configured to use much less Java heap space than allocated here.
 | 
						|
mem_limits = {
 | 
						|
    "elasticsearch": "1g",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def dict_merge(dct, merge_dct):
 | 
						|
    for k, v in merge_dct.items():
 | 
						|
        if k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], Mapping):
 | 
						|
            dict_merge(dct[k], merge_dct[k])
 | 
						|
        else:
 | 
						|
            dct[k] = merge_dct[k]
 | 
						|
 | 
						|
 | 
						|
def modify_docker_config(base_path, docker_yaml_config):
 | 
						|
    # 0. Filter out services to be omitted.
 | 
						|
    for key in list(docker_yaml_config["services"]):
 | 
						|
        if key in omitted_services:
 | 
						|
            del docker_yaml_config["services"][key]
 | 
						|
 | 
						|
    for name, service in docker_yaml_config["services"].items():
 | 
						|
        # 1. Extract the env file pointer
 | 
						|
        env_file = service.get("env_file")
 | 
						|
 | 
						|
        if env_file is not None:
 | 
						|
            # 2. Construct full .env path
 | 
						|
            env_file_path = os.path.join(base_path, env_file)
 | 
						|
 | 
						|
            # 3. Resolve the .env values
 | 
						|
            env_vars = dotenv_values(env_file_path)
 | 
						|
 | 
						|
            # 4. Create an "environment" block if it does not exist
 | 
						|
            if "environment" not in service:
 | 
						|
                service["environment"] = list()
 | 
						|
 | 
						|
            # 5. Append to an "environment" block to YAML
 | 
						|
            for key, value in env_vars.items():
 | 
						|
                service["environment"].append(f"{key}={value}")
 | 
						|
 | 
						|
            # 6. Delete the "env_file" value
 | 
						|
            del service["env_file"]
 | 
						|
 | 
						|
        # 7. Delete build instructions
 | 
						|
        if "build" in service:
 | 
						|
            del service["build"]
 | 
						|
 | 
						|
        # 8. Set memory limits
 | 
						|
        if name in mem_limits:
 | 
						|
            service["mem_limit"] = mem_limits[name]
 | 
						|
 | 
						|
        # 9. Correct relative paths for volume mounts
 | 
						|
        if "volumes" in service:
 | 
						|
            volumes = service["volumes"]
 | 
						|
            for i in range(len(volumes)):
 | 
						|
                ## Quickstart yaml files are located under quickstart. To get correct paths, need to refer to parent directory
 | 
						|
                if volumes[i].startswith("../"):
 | 
						|
                    volumes[i] = "../" + volumes[i]
 | 
						|
                elif volumes[i].startswith("./"):
 | 
						|
                    volumes[i] = "." + volumes[i]
 | 
						|
 | 
						|
    # 9. Set docker compose version to 2.
 | 
						|
    # We need at least this version, since we use features like start_period for
 | 
						|
    # healthchecks and shell-like variable interpolation.
 | 
						|
    docker_yaml_config["version"] = "2.3"
 | 
						|
 | 
						|
 | 
						|
@click.command()
 | 
						|
@click.argument(
 | 
						|
    "compose-files",
 | 
						|
    nargs=-1,
 | 
						|
    type=click.Path(
 | 
						|
        exists=True,
 | 
						|
        dir_okay=False,
 | 
						|
    ),
 | 
						|
)
 | 
						|
@click.argument("output-file", type=click.Path())
 | 
						|
def generate(compose_files, output_file) -> None:
 | 
						|
 | 
						|
    # Resolve .env files to inlined vars
 | 
						|
    modified_files = []
 | 
						|
    for compose_file in compose_files:
 | 
						|
        with open(compose_file, "r") as orig_conf:
 | 
						|
            docker_config = yaml.load(orig_conf, Loader=Loader)
 | 
						|
 | 
						|
        base_path = os.path.dirname(compose_file)
 | 
						|
        modify_docker_config(base_path, docker_config)
 | 
						|
        modified_files.append(docker_config)
 | 
						|
 | 
						|
    # Merge services, networks, and volumes maps
 | 
						|
    merged_docker_config = modified_files[0]
 | 
						|
    for modified_file in modified_files:
 | 
						|
        dict_merge(merged_docker_config, modified_file)
 | 
						|
 | 
						|
    # Write output file
 | 
						|
    output_dir = os.path.dirname(output_file)
 | 
						|
    if len(output_dir) and not os.path.exists(output_dir):
 | 
						|
        os.makedirs(output_dir)
 | 
						|
    with open(output_file, "w") as new_conf_file:
 | 
						|
        yaml.dump(
 | 
						|
            merged_docker_config,
 | 
						|
            new_conf_file,
 | 
						|
            default_flow_style=False,
 | 
						|
        )
 | 
						|
 | 
						|
    print(f"Successfully generated {output_file}.")
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    generate()
 |