Deprecate Milvus1DocumentStore (#2495)

* Add warning message

* Update doc string

* Update Documentation & Code Style

* Change DeprecationWarning to FutureWarning

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
bogdankostic 2022-05-04 15:09:57 +02:00 committed by GitHub
parent 970c476615
commit a4e603ce87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 6 deletions

View File

@ -2569,6 +2569,10 @@ Usage:
def __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = None, embedding_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", isolation_level: str = None) def __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = None, embedding_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", isolation_level: str = None)
``` ```
**WARNING:** Milvus1DocumentStore is deprecated and will be removed in a future version. Please switch to Milvus2
or consider using another DocumentStore.
**Arguments**: **Arguments**:
- `sql_url`: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale - `sql_url`: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale
@ -4436,9 +4440,9 @@ deployment, Postgres is recommended.
- `embedding_dim`: The embedding vector size. - `embedding_dim`: The embedding vector size.
- `return_embedding`: Whether to return document embeddings. - `return_embedding`: Whether to return document embeddings.
- `index`: Name of index in document store to use. - `index`: Name of index in document store to use.
- `similarity`: The similarity function used to compare document vectors. `"dot_product"` is the default - `similarity`: The similarity function used to compare document vectors. `"cosine"` is the default
since it is more performant with DPR embeddings. `"cosine"` is recommended if you are using a and is recommended if you are using a Sentence-Transformer model. `"dot_product"` is more performant
Sentence-Transformer model. with DPR embeddings.
In both cases, the returned values in Document.score are normalized to be in range [0,1]: In both cases, the returned values in Document.score are normalized to be in range [0,1]:
- For `"dot_product"`: `expit(np.asarray(raw_score / 100))` - For `"dot_product"`: `expit(np.asarray(raw_score / 100))`
- For `"cosine"`: `(raw_score + 1) / 2` - For `"cosine"`: `(raw_score + 1) / 2`

View File

@ -26,10 +26,10 @@ This tutorial will show you how to integrate a classification model into your pr
# Install the latest master of Haystack # Install the latest master of Haystack
!pip install --upgrade pip !pip install --upgrade pip
!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab, ocr] !pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,ocr]
!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz !wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz
!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin !tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# Install pygraphviz # Install pygraphviz
!apt install libgraphviz-dev !apt install libgraphviz-dev

View File

@ -62,6 +62,9 @@ class Milvus1DocumentStore(SQLDocumentStore):
isolation_level: str = None, isolation_level: str = None,
): ):
""" """
**WARNING:** Milvus1DocumentStore is deprecated and will be removed in a future version. Please switch to Milvus2
or consider using another DocumentStore.
:param sql_url: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale :param sql_url: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale
deployment, Postgres is recommended. If using MySQL then same server can also be used for deployment, Postgres is recommended. If using MySQL then same server can also be used for
Milvus metadata. For more details see https://milvus.io/docs/v1.0.0/data_manage.md. Milvus metadata. For more details see https://milvus.io/docs/v1.0.0/data_manage.md.
@ -105,6 +108,12 @@ class Milvus1DocumentStore(SQLDocumentStore):
exists. exists.
:param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level) :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
""" """
deprecation_message = (
"Milvus1DocumentStore is deprecated and will be removed in a future version. "
"Please consider switching to Milvus2 or to another DocumentStore."
)
warnings.warn(message=deprecation_message, category=FutureWarning, stacklevel=3)
super().__init__( super().__init__(
url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
) )