2025-10-10 11:44:13 +02:00
|
|
|
#!/usr/bin/env python3
|
2025-10-10 12:04:48 +02:00
|
|
|
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
|
|
|
#
|
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
2025-10-10 11:44:13 +02:00
|
|
|
"""
|
|
|
|
|
Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing.
|
|
|
|
|
|
|
|
|
|
This script fetches the pyproject.toml from a specific Haystack version or branch,
|
|
|
|
|
parses it, and generates a requirements.txt with all dependencies needed
|
|
|
|
|
to run the Python code snippets in the documentation.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
import toml
|
|
|
|
|
|
|
|
|
|
_VERSION_SPLITTERS = ("[", "==", ">=", "<", "!=", "~=")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _package_name(dep: str) -> str:
|
|
|
|
|
"""Return the dependency name stripped of extras and version specifiers."""
|
|
|
|
|
|
|
|
|
|
candidate = dep
|
|
|
|
|
for splitter in _VERSION_SPLITTERS:
|
|
|
|
|
candidate = candidate.split(splitter)[0]
|
|
|
|
|
return candidate.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_haystack_deps(version="main"):
|
|
|
|
|
"""
|
|
|
|
|
Fetch and parse Haystack's pyproject.toml to extract dependencies.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
version: Haystack version (e.g., "2.16.1", "main", "develop")
|
|
|
|
|
"""
|
|
|
|
|
if version == "main":
|
|
|
|
|
url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/main/pyproject.toml"
|
|
|
|
|
elif version == "develop":
|
|
|
|
|
url = "https://raw.githubusercontent.com/deepset-ai/haystack/refs/heads/develop/pyproject.toml"
|
|
|
|
|
else:
|
|
|
|
|
# Format version tag properly (add 'v' prefix if not present)
|
|
|
|
|
if not version.startswith("v"):
|
|
|
|
|
version = f"v{version}"
|
|
|
|
|
url = f"https://raw.githubusercontent.com/deepset-ai/haystack/refs/tags/{version}/pyproject.toml"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, timeout=30)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
except requests.RequestException as e:
|
|
|
|
|
print(f"Failed to fetch pyproject.toml for version {version}: {e}")
|
|
|
|
|
print(f"URL: {url}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
config = toml.loads(response.text)
|
|
|
|
|
except toml.TomlDecodeError as e:
|
|
|
|
|
print(f"Failed to parse pyproject.toml: {e}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# Core project dependencies
|
|
|
|
|
core_deps = config.get("project", {}).get("dependencies", [])
|
|
|
|
|
|
|
|
|
|
# Test environment dependencies (most comprehensive for docs testing)
|
|
|
|
|
test_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("test", {})
|
|
|
|
|
test_deps = test_env.get("dependencies", []) if test_env else []
|
|
|
|
|
|
|
|
|
|
# E2E environment dependencies (additional components)
|
|
|
|
|
e2e_env = config.get("tool", {}).get("hatch", {}).get("envs", {}).get("e2e", {})
|
|
|
|
|
e2e_deps = e2e_env.get("dependencies", []) if e2e_env else []
|
|
|
|
|
|
|
|
|
|
# Combine all dependencies
|
|
|
|
|
all_deps = []
|
|
|
|
|
all_deps.extend(core_deps)
|
|
|
|
|
all_deps.extend(test_deps)
|
|
|
|
|
all_deps.extend(e2e_deps)
|
|
|
|
|
|
|
|
|
|
# Remove duplicates while preserving order
|
|
|
|
|
seen = set()
|
|
|
|
|
unique_deps = []
|
|
|
|
|
for dep in all_deps:
|
|
|
|
|
package_name = _package_name(dep)
|
|
|
|
|
if package_name not in seen:
|
|
|
|
|
seen.add(package_name)
|
|
|
|
|
unique_deps.append(dep)
|
|
|
|
|
|
|
|
|
|
# Filter out test-only dependencies that aren't needed for docs
|
|
|
|
|
test_only_packages = {
|
|
|
|
|
"pytest",
|
|
|
|
|
"pytest-bdd",
|
|
|
|
|
"pytest-cov",
|
|
|
|
|
"pytest-asyncio",
|
|
|
|
|
"pytest-rerunfailures",
|
|
|
|
|
"coverage",
|
|
|
|
|
"mypy",
|
|
|
|
|
"pylint",
|
|
|
|
|
"ipython",
|
|
|
|
|
"colorama",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
filtered_deps = []
|
|
|
|
|
for dep in unique_deps:
|
|
|
|
|
package_name = _package_name(dep)
|
|
|
|
|
if package_name.lower() not in test_only_packages:
|
|
|
|
|
filtered_deps.append(dep)
|
|
|
|
|
|
|
|
|
|
return filtered_deps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
"""Entry point for generating requirements for docs snippet tests."""
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
|
description="Generate requirements.txt from Haystack's pyproject.toml for docs snippet testing"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--version",
|
|
|
|
|
"-v",
|
|
|
|
|
default="main",
|
|
|
|
|
help="Haystack version to fetch dependencies for (e.g., '2.16.1', 'main', 'develop'). Default: main",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--output", "-o", default="requirements.txt", help="Output file path. Default: requirements.txt"
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument("--verbose", action="store_true", help="Show detailed output including all dependencies")
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
print(f"Fetching Haystack dependencies for version: {args.version}")
|
|
|
|
|
deps = fetch_haystack_deps(args.version)
|
|
|
|
|
|
|
|
|
|
requirements_content = f"""# Auto-generated from Haystack pyproject.toml (version: {args.version})
|
|
|
|
|
# For testing docs snippets
|
|
|
|
|
# Generated by scripts/generate_requirements.py
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
for dep in sorted(deps):
|
|
|
|
|
requirements_content += f"{dep}\n"
|
|
|
|
|
|
|
|
|
|
requirements_path = Path(args.output)
|
|
|
|
|
requirements_path.write_text(requirements_content)
|
|
|
|
|
|
|
|
|
|
print(f"Generated {requirements_path} with {len(deps)} dependencies")
|
|
|
|
|
|
|
|
|
|
if args.verbose:
|
|
|
|
|
print("\nTop-level dependencies:")
|
|
|
|
|
for dep in sorted(deps)[:10]: # Show first 10
|
|
|
|
|
print(f" {dep}")
|
|
|
|
|
if len(deps) > 10:
|
|
|
|
|
print(f" ... and {len(deps) - 10} more")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|