refactor: Generate JSON schema when missing (#3533)

* removed unused script

* print info logs when generating openapi schema

* create json schema only when needed

* fix tests

* Remove leftover

Co-authored-by: ZanSara <sarazanzo94@gmail.com>
This commit is contained in:
Massimiliano Pippi 2022-11-17 11:09:27 +01:00 committed by GitHub
parent 8052632b64
commit 6cd0e337d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 35 additions and 49 deletions

View File

@ -1,13 +0,0 @@
#!/usr/bin/env python3
import sys
import logging
from pathlib import Path
logging.basicConfig(level=logging.INFO)
sys.path.append(".")
from haystack.nodes._json_schema import update_json_schema
update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas")

View File

@ -6,6 +6,11 @@ import os
import sys import sys
import shutil import shutil
import logging
logging.basicConfig(level=logging.INFO)
sys.path.append(".") sys.path.append(".")
from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position
from haystack import __version__ # pylint: disable=wrong-import-position from haystack import __version__ # pylint: disable=wrong-import-position
@ -17,7 +22,7 @@ DOCS_PATH = Path("./docs") / "_src" / "api" / "openapi"
os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH
print(f"Loading OpenAPI specs from {APP_PATH} with pipeline at {PIPELINE_PATH}") logging.info("Loading OpenAPI specs from %s with pipeline at %s", APP_PATH, PIPELINE_PATH)
# To initialize the app and the pipelines # To initialize the app and the pipelines
get_app() get_app()

2
.gitignore vendored
View File

@ -150,6 +150,8 @@ saved_models
*_build *_build
rest_api/file-upload/* rest_api/file-upload/*
**/feedback_squad_direct.json **/feedback_squad_direct.json
haystack/json-schemas
.DS_Store .DS_Store
# http cache (requests-cache) # http cache (requests-cache)

View File

@ -1,3 +0,0 @@
*
!.gitignore
!generate_schema.py

View File

@ -1,23 +0,0 @@
import os
import logging
import sysconfig
from pathlib import Path
from haystack.nodes._json_schema import update_json_schema
logger = logging.getLogger("hatch_autorun")
try:
logger.warning(
"Haystack is generating the YAML schema for Pipelines validation. This only happens once, after installing the package."
)
update_json_schema(main_only=True)
# Destroy the hatch-autorun hook if it exists (needs to run just once after installation)
try:
os.remove(Path(sysconfig.get_paths()["purelib"]) / "hatch_autorun_farm_haystack.pth")
except FileNotFoundError:
pass
except Exception as e:
logger.exception("Could not generate the Haystack Pipeline schemas.", e)

View File

@ -1,5 +1,6 @@
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
import os
import sys import sys
import json import json
import inspect import inspect
@ -176,7 +177,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[
node_name = getattr(node_class, "__name__") node_name = getattr(node_class, "__name__")
logger.info("Creating schema for '%s'", node_name) logger.debug("Creating schema for '%s'", node_name)
# Read the relevant init parameters from __init__'s signature # Read the relevant init parameters from __init__'s signature
init_method = getattr(node_class, "__init__", None) init_method = getattr(node_class, "__init__", None)
@ -405,6 +406,26 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
return schema return schema
def load_schema():
"""
Generate the json schema if it doesn't exist and load it
"""
schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json"
if not os.path.exists(schema_file_path):
logging.info("Json schema not found, generating one at: %s", schema_file_path)
try:
update_json_schema(main_only=True)
except Exception as e:
# Be sure not to remain with an empty file if something went wrong
if schema_file_path.exists():
schema_file_path.unlink()
# This error is not recoverable
raise e
with open(schema_file_path, "r") as schema_file:
return json.load(schema_file)
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False): def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
""" """
Create (or update) a new schema. Create (or update) a new schema.
@ -413,6 +434,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bo
# commit from `main` or a release branch # commit from `main` or a release branch
filename = f"haystack-pipeline-main.schema.json" filename = f"haystack-pipeline-main.schema.json"
os.makedirs(destination_path, exist_ok=True)
with open(destination_path / filename, "w") as json_file: with open(destination_path / filename, "w") as json_file:
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2) json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)

View File

@ -14,7 +14,7 @@ from jsonschema.exceptions import ValidationError
from haystack import __version__ from haystack import __version__
from haystack.nodes.base import BaseComponent, RootNode from haystack.nodes.base import BaseComponent, RootNode
from haystack.nodes._json_schema import inject_definition_in_schema, JSON_SCHEMAS_PATH from haystack.nodes._json_schema import load_schema, inject_definition_in_schema
from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError
@ -295,8 +295,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
"and fix your configuration accordingly." "and fix your configuration accordingly."
) )
with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-main.schema.json", "r") as schema_file: # Load the json schema, and create one if it doesn't exist yet
schema = json.load(schema_file) schema = load_schema()
# Remove the version value from the schema to prevent validation errors on it - a version only have to be present. # Remove the version value from the schema to prevent validation errors on it - a version only have to be present.
del schema["properties"]["version"]["const"] del schema["properties"]["version"]["const"]

View File

@ -239,10 +239,6 @@ packages = [
"haystack", "haystack",
] ]
[tool.hatch.build.targets.wheel.hooks.autorun]
dependencies = ["hatch-autorun"]
file = "haystack/json-schemas/generate_schema.py"
[tool.black] [tool.black]
line-length = 120 line-length = 120
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed. skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.

View File

@ -42,7 +42,7 @@ def mock_json_schema(request, monkeypatch, tmp_path):
lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)], lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)],
) )
# Point the JSON schema path to tmp_path # Point the JSON schema path to tmp_path
monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path) monkeypatch.setattr(haystack.nodes._json_schema, "JSON_SCHEMAS_PATH", tmp_path)
# Generate mock schema in tmp_path # Generate mock schema in tmp_path
filename = f"haystack-pipeline-main.schema.json" filename = f"haystack-pipeline-main.schema.json"