refactor: Generate JSON schema when missing (#3533)

* removed unused script

* print info logs when generating openapi schema

* create json schema only when needed

* fix tests

* Remove leftover

Co-authored-by: ZanSara <sarazanzo94@gmail.com>
This commit is contained in:
Massimiliano Pippi 2022-11-17 11:09:27 +01:00 committed by GitHub
parent 8052632b64
commit 6cd0e337d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 35 additions and 49 deletions

View File

@ -1,13 +0,0 @@
#!/usr/bin/env python3
import sys
import logging
from pathlib import Path
logging.basicConfig(level=logging.INFO)
sys.path.append(".")
from haystack.nodes._json_schema import update_json_schema
update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas")

View File

@ -6,6 +6,11 @@ import os
import sys
import shutil
import logging
logging.basicConfig(level=logging.INFO)
sys.path.append(".")
from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position
from haystack import __version__ # pylint: disable=wrong-import-position
@ -17,7 +22,7 @@ DOCS_PATH = Path("./docs") / "_src" / "api" / "openapi"
os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH
print(f"Loading OpenAPI specs from {APP_PATH} with pipeline at {PIPELINE_PATH}")
logging.info("Loading OpenAPI specs from %s with pipeline at %s", APP_PATH, PIPELINE_PATH)
# To initialize the app and the pipelines
get_app()

2
.gitignore vendored
View File

@ -150,6 +150,8 @@ saved_models
*_build
rest_api/file-upload/*
**/feedback_squad_direct.json
haystack/json-schemas
.DS_Store
# http cache (requests-cache)

View File

@ -1,3 +0,0 @@
*
!.gitignore
!generate_schema.py

View File

@ -1,23 +0,0 @@
import os
import logging
import sysconfig
from pathlib import Path
from haystack.nodes._json_schema import update_json_schema
logger = logging.getLogger("hatch_autorun")
try:
logger.warning(
"Haystack is generating the YAML schema for Pipelines validation. This only happens once, after installing the package."
)
update_json_schema(main_only=True)
# Destroy the hatch-autorun hook if it exists (needs to run just once after installation)
try:
os.remove(Path(sysconfig.get_paths()["purelib"]) / "hatch_autorun_farm_haystack.pth")
except FileNotFoundError:
pass
except Exception as e:
logger.exception("Could not generate the Haystack Pipeline schemas.", e)

View File

@ -1,5 +1,6 @@
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
import os
import sys
import json
import inspect
@ -176,7 +177,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[
node_name = getattr(node_class, "__name__")
logger.info("Creating schema for '%s'", node_name)
logger.debug("Creating schema for '%s'", node_name)
# Read the relevant init parameters from __init__'s signature
init_method = getattr(node_class, "__init__", None)
@ -405,6 +406,26 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
return schema
def load_schema():
"""
Generate the json schema if it doesn't exist and load it
"""
schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json"
if not os.path.exists(schema_file_path):
logging.info("Json schema not found, generating one at: %s", schema_file_path)
try:
update_json_schema(main_only=True)
except Exception as e:
# Be sure not to remain with an empty file if something went wrong
if schema_file_path.exists():
schema_file_path.unlink()
# This error is not recoverable
raise e
with open(schema_file_path, "r") as schema_file:
return json.load(schema_file)
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
"""
Create (or update) a new schema.
@ -413,6 +434,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bo
# commit from `main` or a release branch
filename = f"haystack-pipeline-main.schema.json"
os.makedirs(destination_path, exist_ok=True)
with open(destination_path / filename, "w") as json_file:
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)

View File

@ -14,7 +14,7 @@ from jsonschema.exceptions import ValidationError
from haystack import __version__
from haystack.nodes.base import BaseComponent, RootNode
from haystack.nodes._json_schema import inject_definition_in_schema, JSON_SCHEMAS_PATH
from haystack.nodes._json_schema import load_schema, inject_definition_in_schema
from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError
@ -295,8 +295,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
"and fix your configuration accordingly."
)
with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-main.schema.json", "r") as schema_file:
schema = json.load(schema_file)
# Load the json schema, and create one if it doesn't exist yet
schema = load_schema()
# Remove the version value from the schema to prevent validation errors on it - a version only have to be present.
del schema["properties"]["version"]["const"]

View File

@ -239,10 +239,6 @@ packages = [
"haystack",
]
[tool.hatch.build.targets.wheel.hooks.autorun]
dependencies = ["hatch-autorun"]
file = "haystack/json-schemas/generate_schema.py"
[tool.black]
line-length = 120
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.

View File

@ -42,7 +42,7 @@ def mock_json_schema(request, monkeypatch, tmp_path):
lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)],
)
# Point the JSON schema path to tmp_path
monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path)
monkeypatch.setattr(haystack.nodes._json_schema, "JSON_SCHEMAS_PATH", tmp_path)
# Generate mock schema in tmp_path
filename = f"haystack-pipeline-main.schema.json"