mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-01 10:19:23 +00:00
refactor: Generate JSON schema when missing (#3533)
* removed unused script * print info logs when generating openapi schema * create json schema only when needed * fix tests * Remove leftover Co-authored-by: ZanSara <sarazanzo94@gmail.com>
This commit is contained in:
parent
8052632b64
commit
6cd0e337d0
13
.github/utils/generate_json_schema.py
vendored
13
.github/utils/generate_json_schema.py
vendored
@ -1,13 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from haystack.nodes._json_schema import update_json_schema
|
||||
|
||||
update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas")
|
||||
7
.github/utils/generate_openapi_specs.py
vendored
7
.github/utils/generate_openapi_specs.py
vendored
@ -6,6 +6,11 @@ import os
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
sys.path.append(".")
|
||||
from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position
|
||||
from haystack import __version__ # pylint: disable=wrong-import-position
|
||||
@ -17,7 +22,7 @@ DOCS_PATH = Path("./docs") / "_src" / "api" / "openapi"
|
||||
|
||||
os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH
|
||||
|
||||
print(f"Loading OpenAPI specs from {APP_PATH} with pipeline at {PIPELINE_PATH}")
|
||||
logging.info("Loading OpenAPI specs from %s with pipeline at %s", APP_PATH, PIPELINE_PATH)
|
||||
|
||||
# To initialize the app and the pipelines
|
||||
get_app()
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -150,6 +150,8 @@ saved_models
|
||||
*_build
|
||||
rest_api/file-upload/*
|
||||
**/feedback_squad_direct.json
|
||||
haystack/json-schemas
|
||||
|
||||
.DS_Store
|
||||
|
||||
# http cache (requests-cache)
|
||||
|
||||
3
haystack/json-schemas/.gitignore
vendored
3
haystack/json-schemas/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
*
|
||||
!.gitignore
|
||||
!generate_schema.py
|
||||
@ -1,23 +0,0 @@
|
||||
import os
|
||||
import logging
|
||||
import sysconfig
|
||||
from pathlib import Path
|
||||
|
||||
from haystack.nodes._json_schema import update_json_schema
|
||||
|
||||
logger = logging.getLogger("hatch_autorun")
|
||||
|
||||
try:
|
||||
logger.warning(
|
||||
"Haystack is generating the YAML schema for Pipelines validation. This only happens once, after installing the package."
|
||||
)
|
||||
update_json_schema(main_only=True)
|
||||
|
||||
# Destroy the hatch-autorun hook if it exists (needs to run just once after installation)
|
||||
try:
|
||||
os.remove(Path(sysconfig.get_paths()["purelib"]) / "hatch_autorun_farm_haystack.pth")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Could not generate the Haystack Pipeline schemas.", e)
|
||||
@ -1,5 +1,6 @@
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import inspect
|
||||
@ -176,7 +177,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[
|
||||
|
||||
node_name = getattr(node_class, "__name__")
|
||||
|
||||
logger.info("Creating schema for '%s'", node_name)
|
||||
logger.debug("Creating schema for '%s'", node_name)
|
||||
|
||||
# Read the relevant init parameters from __init__'s signature
|
||||
init_method = getattr(node_class, "__init__", None)
|
||||
@ -405,6 +406,26 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
|
||||
return schema
|
||||
|
||||
|
||||
def load_schema():
|
||||
"""
|
||||
Generate the json schema if it doesn't exist and load it
|
||||
"""
|
||||
schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json"
|
||||
if not os.path.exists(schema_file_path):
|
||||
logging.info("Json schema not found, generating one at: %s", schema_file_path)
|
||||
try:
|
||||
update_json_schema(main_only=True)
|
||||
except Exception as e:
|
||||
# Be sure not to remain with an empty file if something went wrong
|
||||
if schema_file_path.exists():
|
||||
schema_file_path.unlink()
|
||||
# This error is not recoverable
|
||||
raise e
|
||||
|
||||
with open(schema_file_path, "r") as schema_file:
|
||||
return json.load(schema_file)
|
||||
|
||||
|
||||
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
|
||||
"""
|
||||
Create (or update) a new schema.
|
||||
@ -413,6 +434,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bo
|
||||
# commit from `main` or a release branch
|
||||
filename = f"haystack-pipeline-main.schema.json"
|
||||
|
||||
os.makedirs(destination_path, exist_ok=True)
|
||||
with open(destination_path / filename, "w") as json_file:
|
||||
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ from jsonschema.exceptions import ValidationError
|
||||
|
||||
from haystack import __version__
|
||||
from haystack.nodes.base import BaseComponent, RootNode
|
||||
from haystack.nodes._json_schema import inject_definition_in_schema, JSON_SCHEMAS_PATH
|
||||
from haystack.nodes._json_schema import load_schema, inject_definition_in_schema
|
||||
from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError
|
||||
|
||||
|
||||
@ -295,8 +295,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
|
||||
"and fix your configuration accordingly."
|
||||
)
|
||||
|
||||
with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-main.schema.json", "r") as schema_file:
|
||||
schema = json.load(schema_file)
|
||||
# Load the json schema, and create one if it doesn't exist yet
|
||||
schema = load_schema()
|
||||
|
||||
# Remove the version value from the schema to prevent validation errors on it - a version only have to be present.
|
||||
del schema["properties"]["version"]["const"]
|
||||
|
||||
@ -239,10 +239,6 @@ packages = [
|
||||
"haystack",
|
||||
]
|
||||
|
||||
[tool.hatch.build.targets.wheel.hooks.autorun]
|
||||
dependencies = ["hatch-autorun"]
|
||||
file = "haystack/json-schemas/generate_schema.py"
|
||||
|
||||
[tool.black]
|
||||
line-length = 120
|
||||
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
|
||||
|
||||
@ -42,7 +42,7 @@ def mock_json_schema(request, monkeypatch, tmp_path):
|
||||
lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)],
|
||||
)
|
||||
# Point the JSON schema path to tmp_path
|
||||
monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path)
|
||||
monkeypatch.setattr(haystack.nodes._json_schema, "JSON_SCHEMAS_PATH", tmp_path)
|
||||
|
||||
# Generate mock schema in tmp_path
|
||||
filename = f"haystack-pipeline-main.schema.json"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user