mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-09 23:17:21 +00:00
feat: utility function to explicitly invoke JSON schema generation (#3798)
* explicitly cache the JSON schema * fix import path * move to final
This commit is contained in:
parent
bebd6b26ec
commit
19c7725319
@ -34,6 +34,7 @@ RUN pip install --upgrade pip && \
|
||||
pip install --no-cache-dir .${haystack_extras} && \
|
||||
pip install --no-cache-dir ./rest_api
|
||||
|
||||
|
||||
FROM $base_immage AS final
|
||||
|
||||
COPY --from=build-image /opt/venv /opt/venv
|
||||
@ -43,7 +44,7 @@ RUN apt-get update && apt-get install -y libfontconfig && rm -rf /var/lib/apt/li
|
||||
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Importing Haystack will generate and persist the json schema, we do this here for two reasons:
|
||||
# The JSON schema is lazily generated at first usage, but we do it explicitly here for two reasons:
|
||||
# - the schema will be already there when the container runs, saving the generation overhead when a container starts
|
||||
# - derived images don't need to write the schema and can run with lower user privileges
|
||||
RUN python3 -c "import haystack"
|
||||
RUN python3 -c "from haystack.utils.docker import cache_schema; cache_schema()"
|
||||
|
@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from typing import List, Union, Optional
|
||||
from haystack.nodes._json_schema import load_schema
|
||||
|
||||
|
||||
def cache_models(models: Optional[List[str]] = None, use_auth_token: Optional[Union[str, bool]] = None):
|
||||
@ -31,3 +32,15 @@ def cache_models(models: Optional[List[str]] = None, use_auth_token: Optional[Un
|
||||
logging.info("Caching %s", model_to_cache)
|
||||
transformers.AutoTokenizer.from_pretrained(model_to_cache, use_auth_token=use_auth_token)
|
||||
transformers.AutoModel.from_pretrained(model_to_cache, use_auth_token=use_auth_token)
|
||||
|
||||
|
||||
def cache_schema():
|
||||
"""
|
||||
Generate and persist Haystack JSON schema.
|
||||
|
||||
The schema is lazily generated at first usage, but this might not work in Docker containers
|
||||
when the user running Haystack doesn't have write permissions on the Python installation. By
|
||||
calling this function at Docker image build time, the schema is generated once for all.
|
||||
"""
|
||||
# Calling load_schema() will generate the schema as a side effect
|
||||
load_schema()
|
||||
|
Loading…
x
Reference in New Issue
Block a user