haystack/docs-website/scripts/generate_requirements.py
2025-10-10 12:04:48 +02:00

157 lines
4.9 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
"""
Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing.
This script fetches the pyproject.toml from a specific Haystack version or branch,
parses it, and generates a requirements.txt with all dependencies needed
to run the Python code snippets in the documentation.
"""
import argparse
import sys
from pathlib import Path
import requests
import toml
_VERSION_SPLITTERS = ("[", "==", ">=", "<", "!=", "~=")
def _package_name(dep: str) -> str:
"""Return the dependency name stripped of extras and version specifiers."""
candidate = dep
for splitter in _VERSION_SPLITTERS:
candidate = candidate.split(splitter)[0]
return candidate.strip()
def fetch_haystack_deps(version="main"):
"""
Fetch and parse Haystack's pyproject.toml to extract dependencies.
Args:
version: Haystack version (e.g., "2.16.1", "main", "develop")
"""
if version == "main":
url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/main/pyproject.toml"
elif version == "develop":
url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/develop/pyproject.toml"
else:
# Format version tag properly (add 'v' prefix if not present)
if not version.startswith("v"):
version = f"v{version}"
url = f"https://raw.githubusercontent.com/deepset-ai/haystack/refs/tags/{version}/pyproject.toml"
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
except requests.RequestException as e:
print(f"Failed to fetch pyproject.toml for version {version}: {e}")
print(f"URL: {url}")
sys.exit(1)
try:
config = toml.loads(response.text)
except toml.TomlDecodeError as e:
print(f"Failed to parse pyproject.toml: {e}")
sys.exit(1)
# Core project dependencies
core_deps = config.get("project", {}).get("dependencies", [])
# Test environment dependencies (most comprehensive for docs testing)
test_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("test", {})
test_deps = test_env.get("dependencies", []) if test_env else []
# E2E environment dependencies (additional components)
e2e_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("e2e", {})
e2e_deps = e2e_env.get("dependencies", []) if e2e_env else []
# Combine all dependencies
all_deps = []
all_deps.extend(core_deps)
all_deps.extend(test_deps)
all_deps.extend(e2e_deps)
# Remove duplicates while preserving order
seen = set()
unique_deps = []
for dep in all_deps:
package_name = _package_name(dep)
if package_name not in seen:
seen.add(package_name)
unique_deps.append(dep)
# Filter out test-only dependencies that aren't needed for docs
test_only_packages = {
"pytest",
"pytest-bdd",
"pytest-cov",
"pytest-asyncio",
"pytest-rerunfailures",
"coverage",
"mypy",
"pylint",
"ipython",
"colorama",
}
filtered_deps = []
for dep in unique_deps:
package_name = _package_name(dep)
if package_name.lower() not in test_only_packages:
filtered_deps.append(dep)
return filtered_deps
def main():
"""Entry point for generating requirements for docs snippet tests."""
parser = argparse.ArgumentParser(
description="Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing"
)
parser.add_argument(
"--version",
"-v",
default="main",
help="Haystack version to fetch dependencies for (e.g., '2.16.1', 'main', 'develop'). Default: main",
)
parser.add_argument(
"--output", "-o", default="requirements.txt", help="Output file path. Default: requirements.txt"
)
parser.add_argument("--verbose", action="store_true", help="Show detailed output including all dependencies")
args = parser.parse_args()
print(f"Fetching Haystack dependencies for version: {args.version}")
deps = fetch_haystack_deps(args.version)
requirements_content = f"""# Auto-generated from Haystack pyproject.toml (version: {args.version})
# For testing docs snippets
# Generated by scripts/generate_requirements.py
"""
for dep in sorted(deps):
requirements_content += f"{dep}\n"
requirements_path = Path(args.output)
requirements_path.write_text(requirements_content)
print(f"Generated {requirements_path} with {len(deps)} dependencies")
if args.verbose:
print("\nTop-level dependencies:")
for dep in sorted(deps)[:10]: # Show first 10
print(f" {dep}")
if len(deps) > 10:
print(f" ... and {len(deps) - 10} more")
if __name__ == "__main__":
main()