mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-14 09:05:58 +00:00
263 lines
9.4 KiB
Python
Executable File
263 lines
9.4 KiB
Python
Executable File
"""
|
|
Deploy Ingestion Pipelines Script
|
|
|
|
This script uses the OpenMetadata client to:
|
|
1. Paginate over ingestion pipelines in groups of 20
|
|
2. Fetch the IDs of those IngestionPipelines
|
|
3. Send bulk deploy requests to api/v1/services/ingestionPipelines/bulk/deploy
|
|
4. Track responses to monitor deployment success/failure
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
from typing import Dict, List
|
|
from uuid import UUID
|
|
|
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
|
OpenMetadataConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
|
|
IngestionPipeline,
|
|
)
|
|
from metadata.generated.schema.entity.services.ingestionPipelines.pipelineServiceClientResponse import (
|
|
PipelineServiceClientResponse,
|
|
)
|
|
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
|
OpenMetadataJWTClientConfig,
|
|
)
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
from metadata.ingestion.ometa.utils import model_str
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PipelineDeployer:
|
|
"""Class to handle bulk deployment of ingestion pipelines"""
|
|
|
|
def __init__(self, server_url: str, jwt_token: str):
|
|
"""
|
|
Initialize the PipelineDeployer with OpenMetadata connection
|
|
|
|
Args:
|
|
server_url: OpenMetadata server URL
|
|
jwt_token: JWT token for authentication
|
|
"""
|
|
# Remove trailing slash if present
|
|
self.server_url = server_url.rstrip('/')
|
|
|
|
# Configure OpenMetadata connection
|
|
server_config = OpenMetadataConnection(
|
|
hostPort=self.server_url,
|
|
authProvider="openmetadata",
|
|
securityConfig=OpenMetadataJWTClientConfig(jwtToken=jwt_token),
|
|
)
|
|
|
|
self.metadata = OpenMetadata(server_config)
|
|
logger.info(f"Connected to OpenMetadata server: {server_url}")
|
|
|
|
def bulk_deploy_pipelines(self, pipeline_ids: List[str]) -> List[PipelineServiceClientResponse]:
|
|
"""
|
|
Send bulk deploy request to OpenMetadata API
|
|
|
|
Args:
|
|
pipeline_ids: List of pipeline UUIDs to deploy
|
|
|
|
Returns:
|
|
List of PipelineServiceClientResponse objects
|
|
"""
|
|
if not pipeline_ids:
|
|
logger.warning("No pipeline IDs provided for deployment")
|
|
return []
|
|
|
|
logger.info(f"Deploying {len(pipeline_ids)} pipelines...")
|
|
|
|
try:
|
|
# Make POST request to bulk deploy endpoint
|
|
response = self.metadata.client.post(
|
|
"/services/ingestionPipelines/bulk/deploy",
|
|
data=json.dumps(pipeline_ids),
|
|
)
|
|
|
|
if response:
|
|
# Parse response data into Pydantic models
|
|
parsed_responses = [
|
|
PipelineServiceClientResponse.model_validate(item)
|
|
for item in response
|
|
]
|
|
logger.info(f"Bulk deploy request completed with {len(parsed_responses)} responses")
|
|
return parsed_responses
|
|
else:
|
|
logger.error("No response from bulk deploy API")
|
|
return []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during bulk deployment: {e}")
|
|
raise
|
|
|
|
def analyze_deployment_results(
|
|
self,
|
|
responses: List[PipelineServiceClientResponse],
|
|
pipeline_ids: List[UUID]
|
|
) -> Dict[str, int]:
|
|
"""
|
|
Analyze deployment responses to track success/failure
|
|
|
|
Args:
|
|
responses: List of PipelineServiceClientResponse objects
|
|
pipeline_ids: List of pipeline IDs that were deployed (for correlation)
|
|
|
|
Returns:
|
|
Dictionary with deployment statistics
|
|
"""
|
|
stats = {
|
|
"total": len(responses),
|
|
"success": 0,
|
|
"failed": 0,
|
|
"unknown": 0
|
|
}
|
|
|
|
success_codes = {200, 201} # HTTP success codes
|
|
|
|
# Correlate responses with pipeline IDs (assuming same order)
|
|
for i, response in enumerate(responses):
|
|
pipeline_id = pipeline_ids[i] if i < len(pipeline_ids) else "unknown"
|
|
code = response.code
|
|
reason = response.reason or ""
|
|
platform = response.platform
|
|
version = response.version or "unknown"
|
|
|
|
if code in success_codes:
|
|
stats["success"] += 1
|
|
logger.info(f"✓ Pipeline {pipeline_id} deployed successfully on {platform} v{version} (code: {code})")
|
|
elif code:
|
|
stats["failed"] += 1
|
|
logger.warning(f"✗ Pipeline {pipeline_id} deployment failed on {platform} v{version} (code: {code}): {reason}")
|
|
else:
|
|
stats["unknown"] += 1
|
|
logger.warning(f"? Pipeline {pipeline_id} unknown deployment status on {platform}: {response.model_dump()}")
|
|
|
|
return stats
|
|
|
|
def deploy_all_pipelines(self, batch_size: int = 20) -> Dict[str, int]:
|
|
"""
|
|
Main method to deploy all ingestion pipelines
|
|
|
|
Args:
|
|
batch_size: Size of batches for pagination and deployment
|
|
|
|
Returns:
|
|
Dictionary with overall deployment statistics
|
|
"""
|
|
try:
|
|
# Fetch all pipelines
|
|
pipelines = list(self.metadata.list_all_entities(entity=IngestionPipeline, skip_on_failure=True))
|
|
|
|
if not pipelines:
|
|
logger.warning("No pipelines found to deploy")
|
|
return {"total": 0, "success": 0, "failed": 0, "unknown": 0}
|
|
|
|
logger.info("Found %d pipelines to deploy", len(pipelines))
|
|
|
|
# Extract pipeline IDs
|
|
pipeline_ids = [model_str(pipeline.id) for pipeline in pipelines]
|
|
|
|
if not pipeline_ids:
|
|
logger.error("No valid pipeline IDs found")
|
|
return {"total": 0, "success": 0, "failed": 0, "unknown": 0}
|
|
|
|
# Deploy pipelines in batches
|
|
all_responses = []
|
|
total_batches = (len(pipeline_ids) + batch_size - 1) // batch_size
|
|
|
|
for i in range(0, len(pipeline_ids), batch_size):
|
|
batch = pipeline_ids[i:i+batch_size]
|
|
batch_num = i//batch_size + 1
|
|
remaining_batches = total_batches - batch_num
|
|
|
|
logger.info(f"Deploying batch {batch_num}/{total_batches} with {len(batch)} pipelines... ({remaining_batches} chunks remaining)")
|
|
|
|
batch_responses = self.bulk_deploy_pipelines(batch)
|
|
all_responses.extend(batch_responses)
|
|
|
|
# Log parsed responses for this batch
|
|
logger.info(f"Batch {batch_num} responses:")
|
|
for j, response in enumerate(batch_responses):
|
|
pipeline_id = batch[j] if j < len(batch) else "unknown"
|
|
logger.info(f" Pipeline {pipeline_id}: code={response.code}, platform={response.platform}, reason={response.reason or 'N/A'}")
|
|
|
|
logger.info(f"Batch {batch_num} completed. Progress: {batch_num}/{total_batches} batches processed")
|
|
|
|
# Analyze results - correlate responses with the pipeline IDs we sent
|
|
stats = self.analyze_deployment_results(all_responses, pipeline_ids)
|
|
|
|
logger.info("=== Deployment Summary ===")
|
|
logger.info(f"Total pipelines: {stats['total']}")
|
|
logger.info(f"Successfully deployed: {stats['success']}")
|
|
logger.info(f"Failed deployments: {stats['failed']}")
|
|
logger.info(f"Unknown status: {stats['unknown']}")
|
|
|
|
return stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Deployment process failed: {e}")
|
|
raise
|
|
|
|
|
|
def main():
|
|
"""Main function to run the pipeline deployment script"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Deploy all ingestion pipelines using OpenMetadata bulk deploy API"
|
|
)
|
|
parser.add_argument(
|
|
"--server-url",
|
|
required=True,
|
|
help="OpenMetadata server URL (e.g., http://localhost:8585/api/)"
|
|
)
|
|
parser.add_argument(
|
|
"--jwt-token",
|
|
required=True,
|
|
help="JWT token for authentication"
|
|
)
|
|
parser.add_argument(
|
|
"--batch-size",
|
|
type=int,
|
|
default=20,
|
|
help="Batch size for pagination and deployment (default: 20)"
|
|
)
|
|
parser.add_argument(
|
|
"--verbose",
|
|
"-v",
|
|
action="store_true",
|
|
help="Enable verbose logging"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
logging.getLogger().setLevel(logging.INFO)
|
|
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
try:
|
|
deployer = PipelineDeployer(args.server_url, args.jwt_token)
|
|
stats = deployer.deploy_all_pipelines(batch_size=args.batch_size)
|
|
|
|
# Exit with error code if any deployments failed
|
|
if stats["failed"] > 0:
|
|
sys.exit(1)
|
|
else:
|
|
sys.exit(0)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Script failed: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |