mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-31 01:54:25 +00:00 
			
		
		
		
	fix documentation html links example (#2608)
Closes #2577 Testing: ``` from unstructured.partition.html import partition_html cnn_lite_url = "https://lite.cnn.com/" elements = partition_html(url=cnn_lite_url) links = [] for element in elements: if element.metadata.link_urls: relative_link = element.metadata.link_urls[0][1:] if relative_link.startswith("2024"): links.append(f"{cnn_lite_url}{relative_link}") print(links) ``` --------- Co-authored-by: ron-unstructured <ronny@unstructured.io> Co-authored-by: Ronny H <138828701+ron-unstructured@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									b9aa4b7452
								
							
						
					
					
						commit
						3783b44d0b
					
				| @ -4,7 +4,7 @@ | |||||||
| # | # | ||||||
| #    pip-compile --output-file=build.txt build.in | #    pip-compile --output-file=build.txt build.in | ||||||
| # | # | ||||||
| alabaster==0.7.16 | alabaster==0.7.13 | ||||||
|     # via sphinx |     # via sphinx | ||||||
| babel==2.14.0 | babel==2.14.0 | ||||||
|     # via sphinx |     # via sphinx | ||||||
|  | |||||||
| @ -20,9 +20,9 @@ First, we gather links from the CNN Lite homepage using the `partition_html` fun | |||||||
|     links = [] |     links = [] | ||||||
| 
 | 
 | ||||||
|     for element in elements: |     for element in elements: | ||||||
|         if element.metadata.links is not None: |         if element.metadata.link_urls: | ||||||
|             relative_link = element.metadata.links[0]["url"][1:] |             relative_link = element.metadata.link_urls[0][1:] | ||||||
|             if relative_link.startswith("2023"): |             if relative_link.startswith("2024"): | ||||||
|                 links.append(f"{cnn_lite_url}{relative_link}") |                 links.append(f"{cnn_lite_url}{relative_link}") | ||||||
| 
 | 
 | ||||||
| Ingest Individual Articles with UnstructuredURLLoader | Ingest Individual Articles with UnstructuredURLLoader | ||||||
|  | |||||||
| @ -4,7 +4,7 @@ | |||||||
| # | # | ||||||
| #    pip-compile --output-file=build.txt build.in | #    pip-compile --output-file=build.txt build.in | ||||||
| # | # | ||||||
| alabaster==0.7.16 | alabaster==0.7.13 | ||||||
|     # via sphinx |     # via sphinx | ||||||
| babel==2.14.0 | babel==2.14.0 | ||||||
|     # via sphinx |     # via sphinx | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 John
						John