#!/usr/bin/env python3 """ Generate test weight files from historical CI test results. This script parses JUnit XML files from multiple CI runs, calculates median test durations, and generates JSON weight files for both Cypress and Pytest tests. """ import argparse import json import statistics import sys import xml.etree.ElementTree as ET from pathlib import Path from typing import Dict, List def parse_cypress_results(artifact_dir: Path) -> Dict[str, List[float]]: """ Parse Cypress JUnit XML files from multiple runs. Args: artifact_dir: Root directory containing run-* subdirectories Returns: Dictionary mapping test file paths to lists of durations across runs Example: {"glossaryV2/v2_glossary_navigation.js": [94.8, 95.2, 94.5]} """ test_durations = {} # Find all cypress-test-*.xml files xml_files = list(artifact_dir.rglob("cypress-test-*.xml")) print(f"Found {len(xml_files)} Cypress XML files") for xml_file in xml_files: try: tree = ET.parse(xml_file) root = tree.getroot() # Find the root suite with file attribute root_suite = root.find('.//testsuite[@file]') if root_suite is None: continue file_path = root_suite.get('file') # Strip "cypress/e2e/" prefix to get relative path if file_path.startswith('cypress/e2e/'): relative_path = file_path.replace('cypress/e2e/', '') else: relative_path = file_path # Find all other testsuites (not the root suite) to get actual test durations all_testsuites = root.findall('.//testsuite') for testsuite in all_testsuites: # Skip if this is the root suite with file attribute if testsuite.get('file'): continue time_str = testsuite.get('time', '0') try: duration = float(time_str) # Only add if duration is non-zero if duration > 0: if relative_path not in test_durations: test_durations[relative_path] = [] test_durations[relative_path].append(duration) # Only take the first non-zero duration per file break except ValueError: print(f"Warning: Invalid duration '{time_str}' in {xml_file}") except ET.ParseError as e: print(f"Warning: Failed to parse {xml_file}: {e}") except Exception as e: print(f"Warning: Error processing {xml_file}: {e}") return test_durations def parse_pytest_results(artifact_dir: Path) -> Dict[str, List[float]]: """ Parse Pytest JUnit XML files from multiple runs. Args: artifact_dir: Root directory containing run-* subdirectories Returns: Dictionary mapping test IDs to lists of durations across runs Example: {"test_e2e::test_gms_get_dataset": [262.8, 265.3, 260.1]} """ test_durations = {} # Find all junit.*.xml files (exclude cypress ones) xml_files = [] for xml_file in artifact_dir.rglob("junit*.xml"): # Exclude Cypress JUnit files if "cypress" not in xml_file.name: xml_files.append(xml_file) print(f"Found {len(xml_files)} Pytest XML files") for xml_file in xml_files: try: tree = ET.parse(xml_file) root = tree.getroot() # Find all testcase elements for testcase in root.findall('.//testcase'): classname = testcase.get('classname', '') name = testcase.get('name', '') time_str = testcase.get('time', '0') # Build test ID if classname and name: test_id = f"{classname}::{name}" elif name: test_id = name else: continue try: duration = float(time_str) # Only add if duration is non-zero if duration > 0: if test_id not in test_durations: test_durations[test_id] = [] test_durations[test_id].append(duration) except ValueError: print(f"Warning: Invalid duration '{time_str}' in {xml_file}") except ET.ParseError as e: print(f"Warning: Failed to parse {xml_file}: {e}") except Exception as e: print(f"Warning: Error processing {xml_file}: {e}") return test_durations def calculate_median_weights( test_durations: Dict[str, List[float]], key_name: str = "filePath" ) -> List[Dict]: """ Calculate median duration for each test. Args: test_durations: Dictionary mapping test IDs to duration lists key_name: Key name to use in output ("filePath" or "testId") Returns: List of dictionaries with test IDs and median durations Example: [{"filePath": "test1", "duration": "10.000s"}, ...] """ results = [] for test_id, durations in test_durations.items(): if not durations: continue median = statistics.median(durations) results.append({ key_name: test_id, "duration": f"{median:.3f}s" }) # Sort by duration descending results.sort(key=lambda x: float(x["duration"][:-1]), reverse=True) return results def main(): parser = argparse.ArgumentParser( description="Generate test weight files from CI test results" ) parser.add_argument( "--input-dir", type=Path, required=True, help="Directory containing test artifacts (organized by run ID)" ) parser.add_argument( "--cypress-output", type=Path, required=True, help="Output path for Cypress test weights JSON" ) parser.add_argument( "--pytest-output", type=Path, required=True, help="Output path for Pytest test weights JSON" ) args = parser.parse_args() if not args.input_dir.exists(): print(f"Error: Input directory does not exist: {args.input_dir}") sys.exit(1) print("=" * 60) print("Parsing Cypress test results...") print("=" * 60) cypress_durations = parse_cypress_results(args.input_dir) print(f"Found {len(cypress_durations)} unique Cypress tests") print("\n" + "=" * 60) print("Parsing Pytest test results...") print("=" * 60) pytest_durations = parse_pytest_results(args.input_dir) print(f"Found {len(pytest_durations)} unique Pytest tests") print("\n" + "=" * 60) print("Calculating median weights...") print("=" * 60) cypress_weights = calculate_median_weights(cypress_durations, key_name="filePath") pytest_weights = calculate_median_weights(pytest_durations, key_name="testId") print(f"Generated {len(cypress_weights)} Cypress weights") print(f"Generated {len(pytest_weights)} Pytest weights") # Create output directories if they don't exist args.cypress_output.parent.mkdir(parents=True, exist_ok=True) args.pytest_output.parent.mkdir(parents=True, exist_ok=True) # Write output files print("\n" + "=" * 60) print("Writing output files...") print("=" * 60) with open(args.cypress_output, 'w') as f: json.dump(cypress_weights, f, indent=2) print(f"Wrote Cypress weights to: {args.cypress_output}") with open(args.pytest_output, 'w') as f: json.dump(pytest_weights, f, indent=2) print(f"Wrote Pytest weights to: {args.pytest_output}") # Print top 5 longest tests for each type if cypress_weights: print("\n" + "=" * 60) print("Top 5 longest Cypress tests:") print("=" * 60) for i, test in enumerate(cypress_weights[:5], 1): print(f"{i}. {test['filePath']}: {test['duration']}") if pytest_weights: print("\n" + "=" * 60) print("Top 5 longest Pytest tests:") print("=" * 60) for i, test in enumerate(pytest_weights[:5], 1): print(f"{i}. {test['testId']}: {test['duration']}") print("\n" + "=" * 60) print("Done!") print("=" * 60) if __name__ == "__main__": main()