import json import re import subprocess import sys from pathlib import Path import avro_codegen import click if sys.version_info < (3, 10): from importlib_metadata import version else: from importlib.metadata import version _avrogen_version = version("avro-gen3") autogen_header = """# Autogenerated by datahub's custom_package_codegen.py # DO NOT EDIT THIS FILE DIRECTLY """ def python_package_name_normalize(name): return re.sub(r"[-_.]+", "_", name).lower() @click.command() @click.argument( "entity_registry", type=click.Path(exists=True, dir_okay=False), required=True ) @click.argument( "pdl_path", type=click.Path(exists=True, file_okay=False), required=True ) @click.argument( "schemas_path", type=click.Path(exists=True, file_okay=False), required=True ) @click.argument("outdir", type=click.Path(), required=True) @click.argument("package_name", type=str, required=True) @click.argument("package_version", type=str, required=True) @click.option( "--build/--no-build", is_flag=True, help="Build the package after generating it", default=True, ) @click.pass_context def generate( ctx: click.Context, entity_registry: str, pdl_path: str, schemas_path: str, outdir: str, package_name: str, package_version: str, build: bool, ) -> None: package_path = Path(outdir) / package_name if package_path.is_absolute(): raise click.UsageError("outdir must be a relative path") python_package_name = python_package_name_normalize(package_name) click.echo( f"Generating distribution {package_name} (package name {python_package_name}) at {package_path}" ) src_path = package_path / "src" / python_package_name src_path.mkdir(parents=True) ctx.invoke( avro_codegen.generate, entity_registry=entity_registry, pdl_path=pdl_path, schemas_path=schemas_path, outdir=str(src_path / "metadata"), enable_custom_loader=False, ) (src_path / "__init__.py").write_text( f"""{autogen_header} __package_name__ = "{package_name}" __version__ = "{package_version}" """ ) (src_path / "py.typed").write_text("") (src_path / "_codegen_config.json").write_text( json.dumps( dict( name=package_name, version=package_version, install_requires=[ f"avro-gen3=={_avrogen_version}", "acryl-datahub", ], package_data={ f"{python_package_name}": ["py.typed", "_codegen_config.json"], f"{python_package_name}.metadata": ["schema.avsc"], f"{python_package_name}.metadata.schemas": ["*.avsc"], }, entry_points={ "datahub.custom_packages": [ f"models={python_package_name}.metadata.schema_classes", f"urns={python_package_name}.metadata._urns.urn_defs", ], }, ), indent=2, ) ) (package_path / "setup.py").write_text( f"""{autogen_header} from setuptools import setup import pathlib import json _codegen_config_file = pathlib.Path("./src/{python_package_name}/_codegen_config.json") setup(**json.loads(_codegen_config_file.read_text())) """ ) # TODO add a README.md? if not build: return click.echo("Building package...") subprocess.run(["python", "-m", "build", str(package_path)]) click.echo() click.secho(f"Generated package at {package_path}", fg="green") click.echo( "This package should be installed alongside the main acryl-datahub package." ) click.echo() click.echo(f"Install the custom package locally with `pip install {package_path}`") click.echo( "To enable others to use it, share the file at " f"{package_path}/dist/{package_name}-{package_version}-py3-none-any.whl " "and have them install it with `pip install .whl`" ) click.echo( f"Alternatively, publish it to PyPI with `twine upload {package_path}/dist/*`" ) if __name__ == "__main__": generate()