mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-17 02:24:46 +00:00
refactor: Generate JSON schema when missing (#3533)
* removed unused script * print info logs when generating openapi schema * create json schema only when needed * fix tests * Remove leftover Co-authored-by: ZanSara <sarazanzo94@gmail.com>
This commit is contained in:
parent
8052632b64
commit
6cd0e337d0
13
.github/utils/generate_json_schema.py
vendored
13
.github/utils/generate_json_schema.py
vendored
@ -1,13 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(".")
|
|
||||||
from haystack.nodes._json_schema import update_json_schema
|
|
||||||
|
|
||||||
update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas")
|
|
||||||
7
.github/utils/generate_openapi_specs.py
vendored
7
.github/utils/generate_openapi_specs.py
vendored
@ -6,6 +6,11 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(".")
|
sys.path.append(".")
|
||||||
from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position
|
from rest_api.utils import get_openapi_specs, get_app, get_pipelines # pylint: disable=wrong-import-position
|
||||||
from haystack import __version__ # pylint: disable=wrong-import-position
|
from haystack import __version__ # pylint: disable=wrong-import-position
|
||||||
@ -17,7 +22,7 @@ DOCS_PATH = Path("./docs") / "_src" / "api" / "openapi"
|
|||||||
|
|
||||||
os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH
|
os.environ["PIPELINE_YAML_PATH"] = PIPELINE_PATH
|
||||||
|
|
||||||
print(f"Loading OpenAPI specs from {APP_PATH} with pipeline at {PIPELINE_PATH}")
|
logging.info("Loading OpenAPI specs from %s with pipeline at %s", APP_PATH, PIPELINE_PATH)
|
||||||
|
|
||||||
# To initialize the app and the pipelines
|
# To initialize the app and the pipelines
|
||||||
get_app()
|
get_app()
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -150,6 +150,8 @@ saved_models
|
|||||||
*_build
|
*_build
|
||||||
rest_api/file-upload/*
|
rest_api/file-upload/*
|
||||||
**/feedback_squad_direct.json
|
**/feedback_squad_direct.json
|
||||||
|
haystack/json-schemas
|
||||||
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
# http cache (requests-cache)
|
# http cache (requests-cache)
|
||||||
|
|||||||
3
haystack/json-schemas/.gitignore
vendored
3
haystack/json-schemas/.gitignore
vendored
@ -1,3 +0,0 @@
|
|||||||
*
|
|
||||||
!.gitignore
|
|
||||||
!generate_schema.py
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
import os
|
|
||||||
import logging
|
|
||||||
import sysconfig
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from haystack.nodes._json_schema import update_json_schema
|
|
||||||
|
|
||||||
logger = logging.getLogger("hatch_autorun")
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.warning(
|
|
||||||
"Haystack is generating the YAML schema for Pipelines validation. This only happens once, after installing the package."
|
|
||||||
)
|
|
||||||
update_json_schema(main_only=True)
|
|
||||||
|
|
||||||
# Destroy the hatch-autorun hook if it exists (needs to run just once after installation)
|
|
||||||
try:
|
|
||||||
os.remove(Path(sysconfig.get_paths()["purelib"]) / "hatch_autorun_farm_haystack.pth")
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.exception("Could not generate the Haystack Pipeline schemas.", e)
|
|
||||||
@ -1,5 +1,6 @@
|
|||||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import inspect
|
import inspect
|
||||||
@ -176,7 +177,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[
|
|||||||
|
|
||||||
node_name = getattr(node_class, "__name__")
|
node_name = getattr(node_class, "__name__")
|
||||||
|
|
||||||
logger.info("Creating schema for '%s'", node_name)
|
logger.debug("Creating schema for '%s'", node_name)
|
||||||
|
|
||||||
# Read the relevant init parameters from __init__'s signature
|
# Read the relevant init parameters from __init__'s signature
|
||||||
init_method = getattr(node_class, "__init__", None)
|
init_method = getattr(node_class, "__init__", None)
|
||||||
@ -405,6 +406,26 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st
|
|||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|
||||||
|
def load_schema():
|
||||||
|
"""
|
||||||
|
Generate the json schema if it doesn't exist and load it
|
||||||
|
"""
|
||||||
|
schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json"
|
||||||
|
if not os.path.exists(schema_file_path):
|
||||||
|
logging.info("Json schema not found, generating one at: %s", schema_file_path)
|
||||||
|
try:
|
||||||
|
update_json_schema(main_only=True)
|
||||||
|
except Exception as e:
|
||||||
|
# Be sure not to remain with an empty file if something went wrong
|
||||||
|
if schema_file_path.exists():
|
||||||
|
schema_file_path.unlink()
|
||||||
|
# This error is not recoverable
|
||||||
|
raise e
|
||||||
|
|
||||||
|
with open(schema_file_path, "r") as schema_file:
|
||||||
|
return json.load(schema_file)
|
||||||
|
|
||||||
|
|
||||||
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
|
def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bool = False):
|
||||||
"""
|
"""
|
||||||
Create (or update) a new schema.
|
Create (or update) a new schema.
|
||||||
@ -413,6 +434,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH, main_only: bo
|
|||||||
# commit from `main` or a release branch
|
# commit from `main` or a release branch
|
||||||
filename = f"haystack-pipeline-main.schema.json"
|
filename = f"haystack-pipeline-main.schema.json"
|
||||||
|
|
||||||
|
os.makedirs(destination_path, exist_ok=True)
|
||||||
with open(destination_path / filename, "w") as json_file:
|
with open(destination_path / filename, "w") as json_file:
|
||||||
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)
|
json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2)
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,7 @@ from jsonschema.exceptions import ValidationError
|
|||||||
|
|
||||||
from haystack import __version__
|
from haystack import __version__
|
||||||
from haystack.nodes.base import BaseComponent, RootNode
|
from haystack.nodes.base import BaseComponent, RootNode
|
||||||
from haystack.nodes._json_schema import inject_definition_in_schema, JSON_SCHEMAS_PATH
|
from haystack.nodes._json_schema import load_schema, inject_definition_in_schema
|
||||||
from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError
|
from haystack.errors import PipelineError, PipelineConfigError, PipelineSchemaError
|
||||||
|
|
||||||
|
|
||||||
@ -295,8 +295,8 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
|
|||||||
"and fix your configuration accordingly."
|
"and fix your configuration accordingly."
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-main.schema.json", "r") as schema_file:
|
# Load the json schema, and create one if it doesn't exist yet
|
||||||
schema = json.load(schema_file)
|
schema = load_schema()
|
||||||
|
|
||||||
# Remove the version value from the schema to prevent validation errors on it - a version only have to be present.
|
# Remove the version value from the schema to prevent validation errors on it - a version only have to be present.
|
||||||
del schema["properties"]["version"]["const"]
|
del schema["properties"]["version"]["const"]
|
||||||
|
|||||||
@ -239,10 +239,6 @@ packages = [
|
|||||||
"haystack",
|
"haystack",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.hatch.build.targets.wheel.hooks.autorun]
|
|
||||||
dependencies = ["hatch-autorun"]
|
|
||||||
file = "haystack/json-schemas/generate_schema.py"
|
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
line-length = 120
|
line-length = 120
|
||||||
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
|
skip_magic_trailing_comma = true # For compatibility with pydoc>=4.6, check if still needed.
|
||||||
|
|||||||
@ -42,7 +42,7 @@ def mock_json_schema(request, monkeypatch, tmp_path):
|
|||||||
lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)],
|
lambda *a, **k: [(conftest, MockDocumentStore), (conftest, MockReader), (conftest, MockRetriever)],
|
||||||
)
|
)
|
||||||
# Point the JSON schema path to tmp_path
|
# Point the JSON schema path to tmp_path
|
||||||
monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path)
|
monkeypatch.setattr(haystack.nodes._json_schema, "JSON_SCHEMAS_PATH", tmp_path)
|
||||||
|
|
||||||
# Generate mock schema in tmp_path
|
# Generate mock schema in tmp_path
|
||||||
filename = f"haystack-pipeline-main.schema.json"
|
filename = f"haystack-pipeline-main.schema.json"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user