feat: hatch-autorun generates schemas (#3484)

* hatch-run generates the schemas

* fix path

* keep schemas for now

* fix path

* schemas

* Do not generate rc schemas

* make the autorun hook self-destroy

* typo

* schemas

* schemas were ok

* improve logs to make generate_schema.py usable standalone too

* fix warning

* Update warning

* Update generate_schema.py

* black
This commit is contained in:
Sara Zan 2022-10-28 13:55:11 +02:00 committed by GitHub
parent 1f9f4ab03a
commit a66e7caa34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 13252 additions and 19571 deletions

View File

@ -0,0 +1,23 @@
import os
import logging
import sysconfig
from pathlib import Path
from haystack.nodes._json_schema import update_json_schema
logger = logging.getLogger("hatch_autorun")
try:
logger.warning(
"Haystack is generating the YAML schema for Pipelines validation. This only happens once, after installing the package."
)
update_json_schema(main_only=True)
# Destroy the hatch-autorun hook if it exists (needs to run just once after installation)
try:
os.remove(Path(sysconfig.get_paths()["purelib"]) / "hatch_autorun_farm_haystack.pth")
except FileNotFoundError:
pass
except Exception as e:
logger.exception("Could not generate the Haystack Pipeline schemas.", e)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -254,6 +254,20 @@
"$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/main/haystack/json-schemas/haystack-pipeline-1.10.0.schema.json"
}
]
},
{
"allOf": [
{
"properties": {
"version": {
"const": "1.11.0rc0"
}
}
},
{
"$ref": "https://raw.githubusercontent.com/deepset-ai/haystack-json-schema/main/json-schema/haystack-pipeline-1.11.0rc0.schema.json"
}
]
}
],
"title": "Haystack Pipeline",

View File

@ -24,11 +24,10 @@ from haystack.nodes.base import BaseComponent
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
JSON_SCHEMAS_PATH = Path(__file__).parent.parent.parent / "haystack" / "json-schemas"
SCHEMA_URL = "https://raw.githubusercontent.com/deepset-ai/haystack/main/haystack/json-schemas/"
SCHEMA_URL = "https://raw.githubusercontent.com/deepset-ai/haystack-json-schema/main/json-schema/"
# Allows accessory classes (like enums and helpers) to be registered as valid input for
# custom node's init parameters. For now we disable this feature, but flipping this variables
@ -406,35 +405,35 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
return schema
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH):
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
"""
Create (or update) a new schema.
"""
# `main` schema is always updated and will contain the same data as the latest
# commit from `main` or a release branch
filename = f"haystack-pipeline-main.schema.json"
with open(destination_path / filename, "w") as json_file:
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2, sort_keys=True)
# Create/update the specific version file too
filename = f"haystack-pipeline-{haystack_version}.schema.json"
with open(destination_path / filename, "w") as json_file:
json.dump(get_json_schema(filename=filename, version=haystack_version), json_file, indent=2, sort_keys=True)
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)
# Update the index
index_name = "haystack-pipeline.schema.json"
with open(destination_path / index_name, "r") as json_file:
index = json.load(json_file)
new_entry = {
"allOf": [
{"properties": {"version": {"const": haystack_version}}},
{
"$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/main/haystack/json-schemas/"
f"haystack-pipeline-{haystack_version}.schema.json"
},
]
}
if new_entry not in index["oneOf"]:
index["oneOf"].append(new_entry)
with open(destination_path / index_name, "w") as json_file:
json.dump(obj=index, fp=json_file, indent=2, sort_keys=True)
if not main_only and "rc" not in haystack_version:
# Create/update the specific version file too
filename = f"haystack-pipeline-{haystack_version}.schema.json"
with open(destination_path / filename, "w") as json_file:
json.dump(get_json_schema(filename=filename, version=haystack_version), json_file, indent=2)
# Update the index
index_name = "haystack-pipeline.schema.json"
with open(destination_path / index_name, "r") as json_file:
index = json.load(json_file)
new_entry = {
"allOf": [
{"properties": {"version": {"const": haystack_version}}},
{"$ref": f"{SCHEMA_URL}haystack-pipeline-{haystack_version}.schema.json"},
]
}
if new_entry not in index["oneOf"]:
index["oneOf"].append(new_entry)
with open(destination_path / index_name, "w") as json_file:
json.dump(obj=index, fp=json_file, indent=2, sort_keys=True)

View File

@ -245,6 +245,10 @@ packages = [
"haystack",
]
[tool.hatch.build.targets.wheel.hooks.autorun]
dependencies = ["hatch-autorun"]
file = "haystack/json-schemas/generate_schema.py"
[tool.black]
line-length = 120
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.