update docstrings (#8142)

2026-01-08 13:06:29 +00:00 · 2024-08-01 16:27:30 +02:00 · 2024-08-01 16:27:30 +02:00 · ffbaed85de
commit ffbaed85de
parent bec822c361
1 changed files with 9 additions and 5 deletions
--- a/haystack/components/fetchers/link_content.py
+++ b/haystack/components/fetchers/link_content.py
@ -52,12 +52,16 @@ def _binary_content_handler(response: Response) -> ByteStream:
@component
 class LinkContentFetcher:
    """
-    LinkContentFetcher is a component for fetching and extracting content from URLs.
+    Fetches and extracts content from URLs.

-    It supports handling various content types, retries on failures, and automatic user-agent rotation for failed web
-    requests.
+    It supports various content types, retries on failures, and automatic user-agent rotation for failed web
+    requests. Use it as the data-fetching step in your pipelines.
+
+    You may need to convert LinkContentFetcher's output into a list of documents. Use HTMLToDocument
+    converter to do this.
+
+    ### Usage example

-    Usage example:
    ```python
    from haystack.components.fetchers.link_content import LinkContentFetcher

@ -84,7 +88,7 @@ class LinkContentFetcher:
            For multiple URLs, it logs errors and returns the content it successfully fetched.
        :param user_agents: [User agents](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent)
            for fetching content. If `None`, a default user agent is used.
-        :param retry_attempts: Specifies how many times you want it to retry to fetch the URL's content.
+        :param retry_attempts: The number of times to retry to fetch the URL's content.
        :param timeout: Timeout in seconds for the request.
        """
        self.raise_on_failure = raise_on_failure