#!/usr/bin/env python3 # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 """ Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing. This script fetches the pyproject.toml from a specific Haystack version or branch, parses it, and generates a requirements.txt with all dependencies needed to run the Python code snippets in the documentation. """ import argparse import sys from pathlib import Path import requests import toml _VERSION_SPLITTERS = ("[", "==", ">=", "<", "!=", "~=") def _package_name(dep: str) -> str: """Return the dependency name stripped of extras and version specifiers.""" candidate = dep for splitter in _VERSION_SPLITTERS: candidate = candidate.split(splitter)[0] return candidate.strip() def fetch_haystack_deps(version="main"): """ Fetch and parse Haystack's pyproject.toml to extract dependencies. Args: version: Haystack version (e.g., "2.16.1", "main", "develop") """ if version == "main": url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/main/pyproject.toml" elif version == "develop": url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/develop/pyproject.toml" else: # Format version tag properly (add 'v' prefix if not present) if not version.startswith("v"): version = f"v{version}" url = f"https://raw.githubusercontent.com/deepset-ai/haystack/refs/tags/{version}/pyproject.toml" try: response = requests.get(url, timeout=30) response.raise_for_status() except requests.RequestException as e: print(f"Failed to fetch pyproject.toml for version {version}: {e}") print(f"URL: {url}") sys.exit(1) try: config = toml.loads(response.text) except toml.TomlDecodeError as e: print(f"Failed to parse pyproject.toml: {e}") sys.exit(1) # Core project dependencies core_deps = config.get("project", {}).get("dependencies", []) # Test environment dependencies (most comprehensive for docs testing) test_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("test", {}) test_deps = test_env.get("dependencies", []) if test_env else [] # E2E environment dependencies (additional components) e2e_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("e2e", {}) e2e_deps = e2e_env.get("dependencies", []) if e2e_env else [] # Combine all dependencies all_deps = [] all_deps.extend(core_deps) all_deps.extend(test_deps) all_deps.extend(e2e_deps) # Remove duplicates while preserving order seen = set() unique_deps = [] for dep in all_deps: package_name = _package_name(dep) if package_name not in seen: seen.add(package_name) unique_deps.append(dep) # Filter out test-only dependencies that aren't needed for docs test_only_packages = { "pytest", "pytest-bdd", "pytest-cov", "pytest-asyncio", "pytest-rerunfailures", "coverage", "mypy", "pylint", "ipython", "colorama", } filtered_deps = [] for dep in unique_deps: package_name = _package_name(dep) if package_name.lower() not in test_only_packages: filtered_deps.append(dep) return filtered_deps def main(): """Entry point for generating requirements for docs snippet tests.""" parser = argparse.ArgumentParser( description="Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing" ) parser.add_argument( "--version", "-v", default="main", help="Haystack version to fetch dependencies for (e.g., '2.16.1', 'main', 'develop'). Default: main", ) parser.add_argument( "--output", "-o", default="requirements.txt", help="Output file path. Default: requirements.txt" ) parser.add_argument("--verbose", action="store_true", help="Show detailed output including all dependencies") args = parser.parse_args() print(f"Fetching Haystack dependencies for version: {args.version}") deps = fetch_haystack_deps(args.version) requirements_content = f"""# Auto-generated from Haystack pyproject.toml (version: {args.version}) # For testing docs snippets # Generated by scripts/generate_requirements.py """ for dep in sorted(deps): requirements_content += f"{dep}\n" requirements_path = Path(args.output) requirements_path.write_text(requirements_content) print(f"Generated {requirements_path} with {len(deps)} dependencies") if args.verbose: print("\nTop-level dependencies:") for dep in sorted(deps)[:10]: # Show first 10 print(f" {dep}") if len(deps) > 10: print(f" ... and {len(deps) - 10} more") if __name__ == "__main__": main()