datahub/metadata-ingestion/scripts/custom_package_codegen.py

150 lines
4.2 KiB
Python
Raw Normal View History

import json
import re
import subprocess
import sys
from pathlib import Path
import avro_codegen
import click
if sys.version_info < (3, 10):
from importlib_metadata import version
else:
from importlib.metadata import version
_avrogen_version = version("avro-gen3")
autogen_header = """# Autogenerated by datahub's custom_package_codegen.py
# DO NOT EDIT THIS FILE DIRECTLY
"""
def python_package_name_normalize(name):
return re.sub(r"[-_.]+", "_", name).lower()
@click.command()
@click.argument(
"entity_registry", type=click.Path(exists=True, dir_okay=False), required=True
)
@click.argument(
"pdl_path", type=click.Path(exists=True, file_okay=False), required=True
)
@click.argument(
"schemas_path", type=click.Path(exists=True, file_okay=False), required=True
)
@click.argument("outdir", type=click.Path(), required=True)
@click.argument("package_name", type=str, required=True)
@click.argument("package_version", type=str, required=True)
@click.option(
"--build/--no-build",
is_flag=True,
help="Build the package after generating it",
default=True,
)
@click.pass_context
def generate(
ctx: click.Context,
entity_registry: str,
pdl_path: str,
schemas_path: str,
outdir: str,
package_name: str,
package_version: str,
build: bool,
) -> None:
package_path = Path(outdir) / package_name
if package_path.is_absolute():
raise click.UsageError("outdir must be a relative path")
python_package_name = python_package_name_normalize(package_name)
click.echo(
f"Generating distribution {package_name} (package name {python_package_name}) at {package_path}"
)
src_path = package_path / "src" / python_package_name
src_path.mkdir(parents=True)
ctx.invoke(
avro_codegen.generate,
entity_registry=entity_registry,
pdl_path=pdl_path,
schemas_path=schemas_path,
outdir=str(src_path / "metadata"),
enable_custom_loader=False,
)
(src_path / "__init__.py").write_text(
f"""{autogen_header}
__package_name__ = "{package_name}"
__version__ = "{package_version}"
"""
)
(src_path / "py.typed").write_text("")
(src_path / "_codegen_config.json").write_text(
json.dumps(
dict(
name=package_name,
version=package_version,
install_requires=[
f"avro-gen3=={_avrogen_version}",
"acryl-datahub",
],
package_data={
f"{python_package_name}": ["py.typed", "_codegen_config.json"],
f"{python_package_name}.metadata": ["schema.avsc"],
f"{python_package_name}.metadata.schemas": ["*.avsc"],
},
entry_points={
"datahub.custom_packages": [
f"models={python_package_name}.metadata.schema_classes",
f"urns={python_package_name}.metadata._urns.urn_defs",
],
},
),
indent=2,
)
)
(package_path / "setup.py").write_text(
f"""{autogen_header}
from setuptools import setup
import pathlib
import json
_codegen_config_file = pathlib.Path("./src/{python_package_name}/_codegen_config.json")
setup(**json.loads(_codegen_config_file.read_text()))
"""
)
# TODO add a README.md?
if not build:
return
click.echo("Building package...")
subprocess.run(["python", "-m", "build", str(package_path)])
click.echo()
click.secho(f"Generated package at {package_path}", fg="green")
click.echo(
"This package should be installed alongside the main acryl-datahub package."
)
click.echo()
click.echo(f"Install the custom package locally with `pip install {package_path}`")
click.echo(
"To enable others to use it, share the file at "
f"{package_path}/dist/{package_name}-{package_version}-py3-none-any.whl "
"and have them install it with `pip install <wheel file>.whl`"
)
click.echo(
f"Alternatively, publish it to PyPI with `twine upload {package_path}/dist/*`"
)
if __name__ == "__main__":
generate()