datahub/.github/scripts/compare_test_weights.py

278 lines
11 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Compare test weights and generate PR description with change analysis.
This script compares old and new test weight files to:
1. Calculate percentage changes in total time
2. Identify tests with significant duration changes (>10%)
3. Find new and removed tests
4. Generate recommendations for batch count adjustments if total time changes >20%
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Dict, List, Tuple
def load_weights(file_path: str, test_id_key: str) -> Dict[str, float]:
"""Load test weights from JSON file into a dict."""
if not Path(file_path).exists():
return {}
with open(file_path) as f:
data = json.load(f)
return {
item[test_id_key]: float(item['duration'].rstrip('s'))
for item in data
}
def calculate_changes(old_weights: Dict[str, float], new_weights: Dict[str, float]) -> Dict:
"""Calculate comprehensive change statistics."""
# New and removed tests (calculate first to exclude from significant changes)
new_test_ids = set(new_weights.keys()) - set(old_weights.keys())
removed_test_ids = set(old_weights.keys()) - set(new_weights.keys())
new_tests = {test: new_weights[test] for test in new_test_ids}
removed_tests = {test: old_weights[test] for test in removed_test_ids}
# Overall stats
old_total = sum(old_weights.values())
new_total = sum(new_weights.values())
total_change_pct = ((new_total - old_total) / old_total * 100) if old_total > 0 else 0
# Individual test changes (ONLY for tests that exist in both old and new)
significant_changes = []
for test_id, new_time in new_weights.items():
# Skip new tests - they shouldn't appear in significant changes
if test_id in old_weights:
old_time = old_weights[test_id]
diff = new_time - old_time
pct_change = (diff / old_time * 100) if old_time > 0 else 0
# Only report significant changes for tests with meaningful durations (>5s)
# This filters out noise from very fast tests
if (abs(pct_change) > 10 or abs(diff) > 10) and (old_time >= 5.0 or new_time >= 5.0):
significant_changes.append({
'test': test_id,
'old': old_time,
'new': new_time,
'diff': diff,
'pct': pct_change
})
# Sort by absolute percentage change
significant_changes.sort(key=lambda x: abs(x['pct']), reverse=True)
return {
'old_total': old_total,
'new_total': new_total,
'total_change_pct': total_change_pct,
'old_count': len(old_weights),
'new_count': len(new_weights),
'significant_changes': significant_changes,
'new_tests': new_tests,
'removed_tests': removed_tests,
'new_tests_total': sum(new_tests.values()),
'removed_tests_total': sum(removed_tests.values())
}
def generate_pr_body(cypress_changes: Dict, pytest_changes: Dict) -> str:
"""Generate markdown PR body with change analysis."""
lines = []
lines.append("## 🤖 Automated Test Weight Update")
lines.append("")
lines.append("This PR updates test weights based on recent CI runs to improve batch balancing.")
lines.append("")
# Overall summary
lines.append("## 📊 Summary")
lines.append("")
lines.append("| Test Type | Old Total | New Total | Change | # Tests |")
lines.append("|-----------|-----------|-----------|--------|---------|")
for name, changes in [("Cypress", cypress_changes), ("Pytest", pytest_changes)]:
old_min = changes['old_total'] / 60
new_min = changes['new_total'] / 60
change_sign = "+" if changes['total_change_pct'] > 0 else ""
lines.append(
f"| {name} | {old_min:.1f} min | {new_min:.1f} min | "
f"{change_sign}{changes['total_change_pct']:.1f}% | "
f"{changes['old_count']}{changes['new_count']} |"
)
lines.append("")
# Warnings for large changes
warnings = []
for name, changes in [("Cypress", cypress_changes), ("Pytest", pytest_changes)]:
if abs(changes['total_change_pct']) > 20:
warnings.append(
f"⚠️ **{name} total time changed by {changes['total_change_pct']:+.1f}%** - "
f"Consider reviewing batch count configuration!"
)
if warnings:
lines.append("## ⚠️ Warnings")
lines.append("")
for warning in warnings:
lines.append(warning)
lines.append("")
lines.append("<details>")
lines.append("<summary>Batch count recommendations</summary>")
lines.append("")
lines.append("Current configuration:")
lines.append("- Cypress: 11 batches (Depot) / 5 batches (GitHub runners)")
lines.append("- Pytest: 6 batches (Depot) / 3 batches (GitHub runners)")
lines.append("")
lines.append("If total time increased >20%, consider increasing batch count to maintain CI speed.")
lines.append("If total time decreased >20%, consider decreasing batch count to save runner costs.")
lines.append("")
lines.append("Update batch counts in `.github/workflows/docker-unified.yml`")
lines.append("</details>")
lines.append("")
# Significant changes
def format_significant_changes(changes: Dict, name: str, max_display: int = 15):
if not changes['significant_changes']:
return []
section = []
section.append(f"## 🔍 {name} - Significant Changes (>10% or >10s)")
section.append("")
section.append("<details>")
section.append(f"<summary>{len(changes['significant_changes'])} tests with significant duration changes</summary>")
section.append("")
for item in changes['significant_changes'][:max_display]:
sign = "+" if item['diff'] > 0 else ""
emoji = "🔴" if item['diff'] > 0 else "🟢"
section.append(f"**{emoji} `{item['test']}`**")
section.append(f"- Old: {item['old']:.1f}s → New: {item['new']:.1f}s ({sign}{item['diff']:.1f}s, {sign}{item['pct']:.1f}%)")
section.append("")
if len(changes['significant_changes']) > max_display:
section.append(f"*... and {len(changes['significant_changes']) - max_display} more*")
section.append("")
section.append("</details>")
section.append("")
return section
lines.extend(format_significant_changes(cypress_changes, "Cypress"))
lines.extend(format_significant_changes(pytest_changes, "Pytest"))
# New and removed tests - show summary instead of listing all
def format_test_changes(changes: Dict, name: str):
new_tests = changes['new_tests']
removed_tests = changes['removed_tests']
new_tests_total = changes['new_tests_total']
removed_tests_total = changes['removed_tests_total']
if not new_tests and not removed_tests:
return []
section = []
section.append(f"## ✨ {name} - Test Changes")
section.append("")
if new_tests:
section.append(f"** Added: {len(new_tests)} tests** ({new_tests_total/60:.1f} min total)")
section.append("<details>")
section.append("<summary>View new tests</summary>")
section.append("")
for test, duration in sorted(list(new_tests.items())[:20]):
section.append(f"- `{test}`: {duration:.1f}s")
if len(new_tests) > 20:
section.append(f"- *... and {len(new_tests) - 20} more*")
section.append("")
section.append("</details>")
section.append("")
if removed_tests:
section.append(f"** Removed: {len(removed_tests)} tests** ({removed_tests_total/60:.1f} min total)")
section.append("<details>")
section.append("<summary>View removed tests</summary>")
section.append("")
for test, duration in sorted(list(removed_tests.items())[:20]):
section.append(f"- `{test}`: {duration:.1f}s")
if len(removed_tests) > 20:
section.append(f"- *... and {len(removed_tests) - 20} more*")
section.append("")
section.append("</details>")
section.append("")
return section
lines.extend(format_test_changes(cypress_changes, "Cypress"))
lines.extend(format_test_changes(pytest_changes, "Pytest"))
# Footer
lines.append("---")
lines.append("")
lines.append("*Generated by automated test weight update workflow*")
lines.append("")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Compare test weights and generate PR description")
parser.add_argument("--old-cypress", required=True, help="Path to old Cypress weights JSON")
parser.add_argument("--new-cypress", required=True, help="Path to new Cypress weights JSON")
parser.add_argument("--old-pytest", required=True, help="Path to old Pytest weights JSON")
parser.add_argument("--new-pytest", required=True, help="Path to new Pytest weights JSON")
parser.add_argument("--output", required=True, help="Output file for PR body markdown")
parser.add_argument("--threshold", type=float, default=5.0,
help="Minimum total change percentage to trigger PR (default: 5.0)")
args = parser.parse_args()
# Load weights
old_cypress = load_weights(args.old_cypress, 'filePath')
new_cypress = load_weights(args.new_cypress, 'filePath')
old_pytest = load_weights(args.old_pytest, 'testId')
new_pytest = load_weights(args.new_pytest, 'testId')
# Calculate changes
cypress_changes = calculate_changes(old_cypress, new_cypress)
pytest_changes = calculate_changes(old_pytest, new_pytest)
# Check if changes exceed threshold
max_change = max(abs(cypress_changes['total_change_pct']), abs(pytest_changes['total_change_pct']))
print(f"Cypress total change: {cypress_changes['total_change_pct']:+.2f}%")
print(f"Pytest total change: {pytest_changes['total_change_pct']:+.2f}%")
print(f"Max change: {max_change:.2f}%")
print(f"Threshold: {args.threshold}%")
if max_change < args.threshold:
print(f"\n✓ Changes below threshold ({args.threshold}%). No PR needed.")
with open(args.output, 'w') as f:
f.write("")
sys.exit(0)
print(f"\n✓ Changes exceed threshold. Generating PR body...")
# Generate PR body
pr_body = generate_pr_body(cypress_changes, pytest_changes)
# Write to output file
with open(args.output, 'w') as f:
f.write(pr_body)
print(f"✓ PR body written to {args.output}")
print(f"✓ Significant changes: Cypress={len(cypress_changes['significant_changes'])}, Pytest={len(pytest_changes['significant_changes'])}")
sys.exit(0)
if __name__ == "__main__":
main()