mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-03 15:17:58 +00:00
60 lines
1.5 KiB
Python
60 lines
1.5 KiB
Python
import json
|
|
from pathlib import Path
|
|
from typing import Union
|
|
|
|
import click
|
|
from avrogen import write_schema_files
|
|
|
|
autogen_header = """# flake8: noqa
|
|
|
|
# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
|
|
# Do not modify manually!
|
|
|
|
# fmt: off
|
|
"""
|
|
|
|
|
|
def suppress_checks_in_file(filepath: Union[str, Path]) -> None:
|
|
"""
|
|
Adds a couple lines to the top of an autogenerated file:
|
|
- Comments to suppress flake8 and black.
|
|
- A note stating that the file was autogenerated.
|
|
"""
|
|
|
|
with open(filepath, "r+") as f:
|
|
contents = f.read()
|
|
|
|
f.seek(0, 0)
|
|
f.write(autogen_header)
|
|
f.write(contents)
|
|
f.write("# fmt: on\n")
|
|
|
|
|
|
@click.command()
|
|
@click.argument("schema_file", type=click.Path(exists=True))
|
|
@click.argument("outdir", type=click.Path())
|
|
def generate(schema_file: str, outdir: str) -> None:
|
|
with open(schema_file) as f:
|
|
raw_schema_text = f.read()
|
|
|
|
no_spaces_schema = json.dumps(json.loads(raw_schema_text))
|
|
schema_json = no_spaces_schema.replace(
|
|
'{"type": "string", "avro.java.string": "String"}', '"string"'
|
|
)
|
|
|
|
redo_spaces = json.dumps(json.loads(schema_json), indent=2)
|
|
|
|
write_schema_files(redo_spaces, outdir)
|
|
with open(f"{outdir}/__init__.py", "w"):
|
|
# Truncate this file.
|
|
pass
|
|
|
|
# Add headers for all generated files
|
|
generated_files = Path(outdir).glob("**/*.py")
|
|
for file in generated_files:
|
|
suppress_checks_in_file(file)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
generate()
|