mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-11 19:11:19 +00:00

* first fucntioning DocxFileToDocument * fix lazy import message * add reno * Add license headder Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * change DocxFileToDocument to DocxToDocument * Update library install to the maintained version Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * clan try-exvept to only take non haystack errors into account * Add wanring on docstring of component ignoring page brakes, mark test as skip * make warnings lazy evaluations Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * make warnings lazy evaluations Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * Make warnings lazy evaluated Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * Solve f bug * Get more metadata from docx files * add 'python-docx' dependency and docs * Change logging import Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * Fix typo Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * remake metadata extraction for docx * solve bug regarding _get_docx_metadata method * Update haystack/components/converters/docx.py Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * Update haystack/components/converters/docx.py Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com> * Delete unused test --------- Co-authored-by: Sebastian Husch Lee <sjrl@users.noreply.github.com>
41 lines
1014 B
YAML
41 lines
1014 B
YAML
loaders:
|
|
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
|
|
search_path: [../../../haystack/components/converters]
|
|
modules:
|
|
[
|
|
"azure",
|
|
"html",
|
|
"markdown",
|
|
"pdfminer",
|
|
"pypdf",
|
|
"pptx",
|
|
"tika",
|
|
"txt",
|
|
"output_adapter",
|
|
"openapi_functions",
|
|
"docx"
|
|
]
|
|
ignore_when_discovered: ["__init__"]
|
|
processors:
|
|
- type: filter
|
|
expression:
|
|
documented_only: true
|
|
do_not_filter_modules: false
|
|
skip_empty_modules: true
|
|
- type: smart
|
|
- type: crossref
|
|
renderer:
|
|
type: haystack_pydoc_tools.renderers.ReadmeCoreRenderer
|
|
excerpt: Various converters to transform data from one format to another.
|
|
category_slug: haystack-api
|
|
title: Converters
|
|
slug: converters-api
|
|
order: 20
|
|
markdown:
|
|
descriptive_class_title: false
|
|
classdef_code_block: false
|
|
descriptive_module_title: true
|
|
add_method_class_prefix: true
|
|
add_member_class_prefix: false
|
|
filename: converters_api.md
|