Docs: Csvconverter docstrings update (#3974)

* Add missing docstrings

* Blackify

* Update haystack/nodes/file_converter/csv.py

Co-authored-by: Sebastian <sjrl@users.noreply.github.com>

* mark some fields as unused

Co-authored-by: ZanSara <sara.zanzottera@deepset.ai>
Co-authored-by: Sebastian <sjrl@users.noreply.github.com>
Co-authored-by: ZanSara <sarazanzo94@gmail.com>
This commit is contained in:
Agnieszka Marzec 2023-01-27 12:10:46 +01:00 committed by GitHub
parent 7a36ccf3e2
commit 95668df92c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -14,7 +14,7 @@ logger = logging.getLogger(__name__)
class CsvTextConverter(BaseConverter):
"""
Converts Question & Answers CSV files to text Documents.
Converts a CSV file containing FAQs to text Documents. The CSV file must have two columns: 'question' and 'answer'. Use this node for FAQ-style question answering.
"""
outgoing_edges = 1
@ -29,11 +29,19 @@ class CsvTextConverter(BaseConverter):
id_hash_keys: Optional[List[str]] = None,
) -> List[Document]:
"""
Load CVS file and convert it to documents.
Load a CSV file containing question-answer pairs and convert it to Documents.
:param file_path: Path to a CSV file containing two columns.
The first will be interpreted as a question, the second as content.
:returns: List of document, 1 document per line in the CSV.
:param file_path: Path to the CSV file you want to convert. The file must have two columns called 'question' and 'answer'.
The first will be interpreted as a question, the second as content.
:param meta: A dictionary of metadata key-value pairs that you want to append to the returned document. It's optional.
:param encoding: Specifies the file encoding. It's optional. The default value is `UTF-8`.
:param id_hash_keys: Generates the document ID from a custom list of strings that refer to the document's
attributes. To ensure you don't have duplicate documents in your DocumentStore when texts are
not unique, modify the metadata and pass, for example, "meta" to this field (example: ["content", "meta"]).
Then the ID is generated by using the content and the metadata you defined.
:param remove_numeric_tables: unused
:param valid_languages: unused
:returns: List of document, 1 document per line in the CSV.
"""
if not isinstance(file_path, list):
file_path = [file_path]