2020-09-22 11:48:26 +02:00
|
|
|
loaders:
|
2023-08-02 18:54:15 +02:00
|
|
|
- type: loaders.CustomPythonLoader
|
2023-02-03 12:51:10 +01:00
|
|
|
search_path: [../../../haystack/nodes/file_converter]
|
2023-03-01 09:51:02 +01:00
|
|
|
modules:
|
|
|
|
[
|
|
|
|
"csv",
|
|
|
|
"docx",
|
|
|
|
"image",
|
|
|
|
"markdown",
|
|
|
|
"pdf",
|
|
|
|
"parsr",
|
|
|
|
"azure",
|
|
|
|
"tika",
|
|
|
|
"txt",
|
2023-05-10 10:56:46 +02:00
|
|
|
"json",
|
2023-03-01 09:51:02 +01:00
|
|
|
]
|
|
|
|
ignore_when_discovered: ["__init__"]
|
2022-02-04 15:45:09 +01:00
|
|
|
processors:
|
2020-09-22 11:48:26 +02:00
|
|
|
- type: filter
|
2023-02-03 12:51:10 +01:00
|
|
|
expression:
|
2022-02-04 15:45:09 +01:00
|
|
|
documented_only: true
|
|
|
|
do_not_filter_modules: false
|
|
|
|
skip_empty_modules: true
|
|
|
|
- type: smart
|
|
|
|
- type: crossref
|
2020-09-22 11:48:26 +02:00
|
|
|
renderer:
|
2023-03-01 09:51:02 +01:00
|
|
|
type: renderers.ReadmeRenderer
|
|
|
|
excerpt: Extracts text from files in different formats and cast it into the unified Document format.
|
|
|
|
category_slug: haystack-classes
|
|
|
|
title: File Converters API
|
|
|
|
slug: file-converters-api
|
|
|
|
order: 70
|
|
|
|
markdown:
|
|
|
|
descriptive_class_title: false
|
|
|
|
descriptive_module_title: true
|
|
|
|
add_method_class_prefix: true
|
|
|
|
add_member_class_prefix: false
|
|
|
|
filename: file_converters_api.md
|