mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-30 18:26:58 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			246 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			246 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from io import StringIO
 | |
| from typing import List
 | |
| import os
 | |
| import sys
 | |
| import pathlib
 | |
| from collections.abc import Mapping
 | |
| 
 | |
| import click
 | |
| import yaml
 | |
| from dotenv import dotenv_values
 | |
| from yaml import Loader
 | |
| 
 | |
| COMPOSE_SPECS = {
 | |
|     "docker-compose.quickstart.yml": [
 | |
|         "../docker-compose.yml",
 | |
|         "../docker-compose.override.yml",
 | |
|     ],
 | |
|     "docker-compose-m1.quickstart.yml": [
 | |
|         "../docker-compose.yml",
 | |
|         "../docker-compose.override.yml",
 | |
|         "../docker-compose.m1.yml",
 | |
|     ],
 | |
|     "docker-compose-without-neo4j.quickstart.yml": [
 | |
|         "../docker-compose-without-neo4j.yml",
 | |
|         "../docker-compose-without-neo4j.override.yml",
 | |
|     ],
 | |
|     "docker-compose-without-neo4j-m1.quickstart.yml": [
 | |
|         "../docker-compose-without-neo4j.yml",
 | |
|         "../docker-compose-without-neo4j.override.yml",
 | |
|         "../docker-compose-without-neo4j.m1.yml",
 | |
|     ],
 | |
|     "docker-compose.monitoring.quickstart.yml": [
 | |
|         "../monitoring/docker-compose.monitoring.yml",
 | |
|     ],
 | |
|     "docker-compose.consumers.quickstart.yml": [
 | |
|         "../docker-compose.consumers.yml",
 | |
|     ],
 | |
|     "docker-compose.consumers-without-neo4j.quickstart.yml": [
 | |
|         "../docker-compose.consumers-without-neo4j.yml",
 | |
|     ],
 | |
|     "docker-compose.kafka-setup.quickstart.yml": [
 | |
|         "../docker-compose.kafka-setup.yml",
 | |
|     ],
 | |
| }
 | |
| 
 | |
| omitted_services = [
 | |
|     "kafka-rest-proxy",
 | |
|     "kafka-topics-ui",
 | |
|     "schema-registry-ui",
 | |
|     "kibana",
 | |
| ]
 | |
| # Note that these are upper bounds on memory usage. Once exceeded, the container is killed.
 | |
| # Each service will be configured to use much less Java heap space than allocated here.
 | |
| mem_limits = {
 | |
|     "elasticsearch": "1G",
 | |
| }
 | |
| 
 | |
| 
 | |
| def dict_merge(dct, merge_dct):
 | |
|     for k, v in merge_dct.items():
 | |
|         if k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], Mapping):
 | |
|             dict_merge(dct[k], merge_dct[k])
 | |
|         elif k in dct and isinstance(dct[k], list):
 | |
|             a = set(dct[k])
 | |
|             b = set(merge_dct[k])
 | |
|             if a != b:
 | |
|                 dct[k] = sorted(list(a.union(b)))
 | |
|         else:
 | |
|             dct[k] = merge_dct[k]
 | |
| 
 | |
| 
 | |
| def modify_docker_config(base_path, docker_yaml_config):
 | |
|     if not docker_yaml_config["services"]:
 | |
|         docker_yaml_config["services"] = {}
 | |
|     # 0. Filter out services to be omitted.
 | |
|     for key in docker_yaml_config["services"]:
 | |
|         if key in omitted_services:
 | |
|             del docker_yaml_config["services"][key]
 | |
| 
 | |
|     for name, service in docker_yaml_config["services"].items():
 | |
|         # 1. Extract the env file pointer
 | |
|         env_file = service.get("env_file")
 | |
| 
 | |
|         if env_file is not None:
 | |
|             # 2. Construct full .env path
 | |
|             env_file_path = os.path.join(base_path, env_file)
 | |
| 
 | |
|             # 3. Resolve the .env values
 | |
|             env_vars = dotenv_values(env_file_path)
 | |
| 
 | |
|             # 4. Create an "environment" block if it does not exist
 | |
|             if "environment" not in service:
 | |
|                 service["environment"] = list()
 | |
| 
 | |
|             # 5. Append to an "environment" block to YAML
 | |
|             for key, value in env_vars.items():
 | |
|                 if value is not None:
 | |
|                     service["environment"].append(f"{key}={value}")
 | |
|                 else:
 | |
|                     service["environment"].append(f"{key}")
 | |
| 
 | |
|             # 6. Delete the "env_file" value
 | |
|             del service["env_file"]
 | |
| 
 | |
|         # 7. Delete build instructions
 | |
|         if "build" in service:
 | |
|             del service["build"]
 | |
| 
 | |
|         # 8. Set memory limits
 | |
|         if name in mem_limits:
 | |
|             service["deploy"] = {"resources":{"limits":{"memory":mem_limits[name]}}}
 | |
| 
 | |
|         # 9. Correct relative paths for volume mounts
 | |
|         if "volumes" in service:
 | |
|             volumes = service["volumes"]
 | |
|             for i in range(len(volumes)):
 | |
|                 ## Quickstart yaml files are located under quickstart. To get correct paths, need to refer to parent directory
 | |
|                 if volumes[i].startswith("../"):
 | |
|                     volumes[i] = "../" + volumes[i]
 | |
|                 elif volumes[i].startswith("./"):
 | |
|                     volumes[i] = "." + volumes[i]
 | |
| 
 | |
| 
 | |
| def dedup_env_vars(merged_docker_config):
 | |
|     for service in merged_docker_config["services"]:
 | |
|         if "environment" in merged_docker_config["services"][service]:
 | |
|             lst = merged_docker_config["services"][service]["environment"]
 | |
|             if lst is not None:
 | |
|                 # use a set to cache duplicates
 | |
|                 caches = set()
 | |
|                 results = {}
 | |
|                 for item in lst:
 | |
|                     partitions = item.rpartition("=")
 | |
|                     prefix = partitions[0]
 | |
|                     suffix = partitions[1]
 | |
|                     # check whether prefix already exists
 | |
|                     if prefix not in caches and suffix != "":
 | |
|                         results[prefix] = item
 | |
|                         caches.add(prefix)
 | |
|                 if set(lst) != set([v for k, v in results.items()]):
 | |
|                     sorted_vars = sorted([k for k in results])
 | |
|                     merged_docker_config["services"][service]["environment"] = [
 | |
|                         results[var] for var in sorted_vars
 | |
|                     ]
 | |
| 
 | |
| 
 | |
| def merge_files(compose_files: List[str]) -> str:
 | |
|     """
 | |
|     Generates a merged docker-compose file with env variables inlined.
 | |
| 
 | |
|     Example Usage: python3 generate_docker_quickstart.py generate-one ../docker-compose.yml ../docker-compose.override.yml ../docker-compose-gen.yml
 | |
|     """
 | |
| 
 | |
|     # Resolve .env files to inlined vars
 | |
|     modified_files = []
 | |
|     for compose_file in compose_files:
 | |
|         with open(compose_file, "r") as orig_conf:
 | |
|             docker_config = yaml.load(orig_conf, Loader=Loader)
 | |
| 
 | |
|         base_path = os.path.dirname(compose_file)
 | |
|         modify_docker_config(base_path, docker_config)
 | |
|         modified_files.append(docker_config)
 | |
| 
 | |
|     # Merge services, networks, and volumes maps
 | |
|     merged_docker_config = modified_files[0]
 | |
|     for modified_file in modified_files:
 | |
|         dict_merge(merged_docker_config, modified_file)
 | |
| 
 | |
|     # Dedup env vars, last wins
 | |
|     dedup_env_vars(merged_docker_config)
 | |
| 
 | |
|     # Generate yaml to string.
 | |
|     out = StringIO()
 | |
|     yaml.dump(
 | |
|         merged_docker_config,
 | |
|         out,
 | |
|         default_flow_style=False,
 | |
|         width=1000,
 | |
|     )
 | |
|     return out.getvalue()
 | |
| 
 | |
| 
 | |
| @click.group()
 | |
| def main_cmd() -> None:
 | |
|     pass
 | |
| 
 | |
| 
 | |
| @main_cmd.command()
 | |
| @click.argument(
 | |
|     "compose-files",
 | |
|     nargs=-1,
 | |
|     type=click.Path(
 | |
|         exists=True,
 | |
|         dir_okay=False,
 | |
|     ),
 | |
| )
 | |
| @click.argument("output-file", type=click.Path())
 | |
| def generate_one(compose_files, output_file) -> None:
 | |
|     """
 | |
|     Generates a merged docker-compose file with env variables inlined.
 | |
| 
 | |
|     Example Usage: python3 generate_docker_quickstart.py generate-one ../docker-compose.yml ../docker-compose.override.yml ../docker-compose-gen.yml
 | |
|     """
 | |
| 
 | |
|     merged_contents = merge_files(compose_files)
 | |
| 
 | |
|     # Write output file
 | |
|     pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
 | |
|     pathlib.Path(output_file).write_text(merged_contents)
 | |
| 
 | |
|     print(f"Successfully generated {output_file}.")
 | |
| 
 | |
| 
 | |
| @main_cmd.command()
 | |
| @click.pass_context
 | |
| def generate_all(ctx: click.Context) -> None:
 | |
|     """
 | |
|     Generates all merged docker-compose files with env variables inlined.
 | |
|     """
 | |
| 
 | |
|     for output_compose_file, inputs in COMPOSE_SPECS.items():
 | |
|         ctx.invoke(generate_one, compose_files=inputs, output_file=output_compose_file)
 | |
| 
 | |
| 
 | |
| @main_cmd.command()
 | |
| def check_all() -> None:
 | |
|     """
 | |
|     Checks that the generated docker-compose files are up to date.
 | |
|     """
 | |
| 
 | |
|     for output_compose_file, inputs in COMPOSE_SPECS.items():
 | |
|         expected = merge_files(inputs)
 | |
| 
 | |
|         # Check that the files match.
 | |
|         current = pathlib.Path(output_compose_file).read_text()
 | |
| 
 | |
|         if expected != current:
 | |
|             print(
 | |
|                 f"File {output_compose_file} is out of date. Please run `python3 generate_docker_quickstart.py generate-all`."
 | |
|             )
 | |
|             sys.exit(1)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main_cmd()
 | 
