diff --git a/docs/cli-commands/graphql.md b/docs/cli-commands/graphql.md new file mode 100644 index 0000000000..65785cde06 --- /dev/null +++ b/docs/cli-commands/graphql.md @@ -0,0 +1,502 @@ +# DataHub GraphQL CLI + +The `datahub graphql` command provides a powerful interface to interact with DataHub's GraphQL API directly from the command line. This enables you to query metadata, perform mutations, and explore the GraphQL schema without writing custom applications. + +## Quick Start + +```shell +# Get current user info +datahub graphql --operation me + +# Search for datasets +datahub graphql --operation searchAcrossEntities --variables '{"input": {"query": "users", "types": ["DATASET"]}}' + +# Execute raw GraphQL +datahub graphql --query "query { me { username } }" +``` + +## Core Features + +### 1. Schema Discovery + +Discover available operations and understand their structure: + +```shell +# List all available operations +datahub graphql --list-operations + +# List only queries or mutations +datahub graphql --list-queries +datahub graphql --list-mutations +``` + +### 2. Smart Description + +The `--describe` command intelligently searches for both operations and types: + +```shell +# Describe an operation +datahub graphql --describe searchAcrossEntities + +# Describe a GraphQL type +datahub graphql --describe SearchInput + +# Describe enum types to see allowed values +datahub graphql --describe FilterOperator +``` + +**When both operation and type exist with same name:** + +```shell +datahub graphql --describe someConflictingName +# Output: +# === OPERATION === +# Operation: someConflictingName +# Type: Query +# ... +# +# === TYPE === +# Type: someConflictingName +# Kind: INPUT_OBJECT +# ... +``` + +### 3. Recursive Type Exploration + +Use `--recurse` with `--describe` to explore all nested types: + +```shell +# Explore operation with all its input types +datahub graphql --describe searchAcrossEntities --recurse + +# Explore type with all nested dependencies +datahub graphql --describe SearchInput --recurse +``` + +**Example recursive output:** + +``` +Operation: searchAcrossEntities +Type: Query +Description: Search across all entity types +Arguments: + - input: SearchInput! + +Input Type Details: + +SearchInput: + query: String + types: [EntityType!] + filters: SearchFilter + +SearchFilter: + criteria: [FacetFilterInput!] + +FacetFilterInput: + field: String! - Name of field to filter by + values: [String!]! - Values, one of which the intended field should match + condition: FilterOperator - Condition for the values + +FilterOperator: + EQUAL - Represents the relation: field = value + GREATER_THAN - Represents the relation: field > value + LESS_THAN - Represents the relation: field < value +``` + +### 4. Operation Execution + +Execute operations by name without writing full GraphQL: + +```shell +# Execute operation by name +datahub graphql --operation me + +# Execute with variables +datahub graphql --operation searchAcrossEntities --variables '{"input": {"query": "datasets", "types": ["DATASET"]}}' + +# Execute with variables from file +datahub graphql --operation createGroup --variables ./group-data.json +``` + +### 5. Raw GraphQL Execution + +Execute any custom GraphQL query or mutation: + +```shell +# Simple query +datahub graphql --query "query { me { username } }" + +# Query with variables +datahub graphql --query "query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }" --variables '{"urn": "urn:li:corpuser:john"}' + +# Query from file +datahub graphql --query ./complex-query.graphql --variables ./variables.json + +# Mutation +datahub graphql --query "mutation { addTag(input: {resourceUrn: \"urn:li:dataset:...\", tagUrn: \"urn:li:tag:Important\"}) }" +``` + +### 6. File Support + +Both queries and variables can be loaded from files: + +```shell +# Load query from file +datahub graphql --query ./queries/search-datasets.graphql + +# Load variables from file +datahub graphql --operation searchAcrossEntities --variables ./variables/search-params.json + +# Both from files +datahub graphql --query ./query.graphql --variables ./vars.json +``` + +### 7. LLM-Friendly JSON Output + +Use `--format json` to get structured JSON output perfect for LLM consumption: + +```shell +# Get operations as JSON for LLM processing +datahub graphql --list-operations --format json + +# Describe operation with complete type information +datahub graphql --describe searchAcrossEntities --recurse --format json + +# Get type details in structured format +datahub graphql --describe SearchInput --format json +``` + +**Example JSON output for `--list-operations --format json`:** + +```json +{ + "schema": { + "queries": [ + { + "name": "me", + "type": "Query", + "description": "Get current user information", + "arguments": [] + }, + { + "name": "searchAcrossEntities", + "type": "Query", + "description": "Search across all entity types", + "arguments": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": { + "name": "SearchInput", + "kind": "INPUT_OBJECT" + } + }, + "required": true, + "description": "Search input parameters" + } + ] + } + ], + "mutations": [...] + } +} +``` + +**Example JSON output for `--describe searchAcrossEntities --recurse --format json`:** + +```json +{ + "operation": { + "name": "searchAcrossEntities", + "type": "Query", + "description": "Search across all entity types", + "arguments": [...] + }, + "relatedTypes": { + "SearchInput": { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "fields": [ + { + "name": "query", + "type": {"name": "String", "kind": "SCALAR"}, + "description": "Search query string" + }, + { + "name": "filters", + "type": {"name": "SearchFilter", "kind": "INPUT_OBJECT"}, + "description": "Optional filters" + } + ] + }, + "SearchFilter": {...}, + "FilterOperator": { + "name": "FilterOperator", + "kind": "ENUM", + "values": [ + { + "name": "EQUAL", + "description": "Represents the relation: field = value", + "deprecated": false + } + ] + } + }, + "meta": { + "query": "searchAcrossEntities", + "recursive": true + } +} +``` + +### 8. Custom Schema Path + +When introspection is disabled or for local development: + +```shell +# Use local GraphQL schema files +datahub graphql --list-operations --schema-path ./local-schemas/ + +# Describe with custom schema +datahub graphql --describe searchAcrossEntities --schema-path ./graphql-schemas/ + +# Get JSON format with custom schema +datahub graphql --list-operations --schema-path ./schemas/ --format json +``` + +## Command Reference + +### Global Options + +| Option | Type | Description | +| ------------------- | ------ | -------------------------------------------------------------- | +| `--query` | string | GraphQL query/mutation string or path to .graphql file | +| `--variables` | string | Variables as JSON string or path to .json file | +| `--operation` | string | Execute named operation from DataHub's schema | +| `--describe` | string | Describe operation or type (searches both) | +| `--recurse` | flag | Recursively explore nested types with --describe | +| `--list-operations` | flag | List all available operations | +| `--list-queries` | flag | List available query operations | +| `--list-mutations` | flag | List available mutation operations | +| `--schema-path` | string | Path to GraphQL schema files directory | +| `--no-pretty` | flag | Disable pretty-printing of JSON output (default: pretty-print) | +| `--format` | choice | Output format: `human` (default) or `json` for LLM consumption | + +### Usage Patterns + +```shell +# Discovery +datahub graphql --list-operations +datahub graphql --describe [--recurse] + +# Execution +datahub graphql --operation [--variables ] +datahub graphql --query [--variables ] +``` + +## Advanced Examples + +### Complex Search with Filters + +```shell +datahub graphql --operation searchAcrossEntities --variables '{ + "input": { + "query": "customer", + "types": ["DATASET", "DASHBOARD"], + "filters": [{ + "field": "platform", + "values": ["mysql", "postgres"] + }], + "start": 0, + "count": 20 + } +}' +``` + +### Adding Tags to Multiple Entities + +```shell +# Add Important tag to a dataset +datahub graphql --query 'mutation AddTag($input: TagAssociationInput!) { + addTag(input: $input) +}' --variables '{ + "input": { + "resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,db.users,PROD)", + "tagUrn": "urn:li:tag:Important" + } +}' +``` + +### Batch User Queries + +```shell +# Get multiple users using raw GraphQL +datahub graphql --query 'query GetUsers($urns: [String!]!) { + users: batchGet(urns: $urns) { + ... on CorpUser { + urn + username + properties { + email + displayName + } + } + } +}' --variables '{"urns": ["urn:li:corpuser:alice", "urn:li:corpuser:bob"]}' +``` + +## Schema Introspection + +DataHub's GraphQL CLI provides two modes for schema discovery: + +### Schema Discovery Modes + +1. **Live Introspection** (default): Queries the live GraphQL endpoint when no `--schema-path` is provided +2. **Local Schema Files**: Uses `.graphql` files from the specified directory when `--schema-path` is provided + +**Note:** These modes are mutually exclusive with no fallback between them. If introspection fails, the command will fail with an error. If local schema files are invalid, the command will fail with an error. + +### Schema File Structure + +When using `--schema-path`, the directory should contain `.graphql` files with: + +```graphql +# queries.graphql +extend type Query { + me: AuthenticatedUser + searchAcrossEntities(input: SearchInput!): SearchResults +} + +# mutations.graphql +extend type Mutation { + addTag(input: TagAssociationInput!): String + deleteEntity(urn: String!): String +} +``` + +## Error Handling + +The CLI provides clear error messages for common issues: + +```shell +# Operation not found +datahub graphql --describe nonExistentOp +# Error: 'nonExistentOp' not found as an operation or type. Use --list-operations to see available operations or try a specific type name. + +# Missing required arguments +datahub graphql --operation searchAcrossEntities +# Error: Operation 'searchAcrossEntities' requires arguments: input. Provide them using --variables '{"input": "value", ...}' + +# Invalid JSON variables +datahub graphql --operation me --variables '{invalid json}' +# Error: Invalid JSON in variables: Expecting property name enclosed in double quotes +``` + +## Output Formats + +### Pretty Printing (Default) + +```json +{ + "me": { + "corpUser": { + "urn": "urn:li:corpuser:datahub", + "username": "datahub" + } + } +} +``` + +### Compact Output + +```shell +datahub graphql --operation me --no-pretty +{"me":{"corpUser":{"urn":"urn:li:corpuser:datahub","username":"datahub"}}} +``` + +## Integration Examples + +### Shell Scripts + +```bash +#!/bin/bash +# Get all datasets for a platform +PLATFORM="mysql" +RESULTS=$(datahub graphql --operation searchAcrossEntities --variables "{ + \"input\": { + \"query\": \"*\", + \"types\": [\"DATASET\"], + \"filters\": [{\"field\": \"platform\", \"values\": [\"$PLATFORM\"]}] + } +}" --no-pretty) + +echo "Found $(echo "$RESULTS" | jq '.searchAcrossEntities.total') datasets" +``` + +### CI/CD Pipelines + +```yaml +# GitHub Actions example +- name: Tag Important Datasets + run: | + datahub graphql --operation addTag --variables '{ + "input": { + "resourceUrn": "${{ env.DATASET_URN }}", + "tagUrn": "urn:li:tag:Production" + } + }' +``` + +## LLM Integration + +The `--format json` option makes the CLI perfect for LLM integration: + +### Benefits for AI Assistants + +1. **Schema Understanding**: LLMs can parse the complete GraphQL schema structure +2. **Query Generation**: AI can generate accurate GraphQL queries based on available operations +3. **Type Validation**: LLMs understand required vs optional arguments and their types +4. **Documentation**: Rich descriptions and examples help AI provide better user assistance + +### Use Cases + +```shell +# AI assistant gets complete schema knowledge +datahub graphql --list-operations --format json | ai-assistant process-schema + +# Generate queries for user requests +datahub graphql --describe searchAcrossEntities --recurse --format json | ai-helper generate-query --user-intent "find mysql tables" + +# Validate user input against schema +datahub graphql --describe createGroup --format json | validate-user-input +``` + +### JSON Schema Benefits + +- **Structured data**: No parsing of human-readable text required +- **Complete type information**: Includes GraphQL type wrappers (NON_NULL, LIST) +- **Rich metadata**: Descriptions, deprecation info, argument requirements +- **Consistent format**: Predictable structure across all operations and types +- **Recursive exploration**: Complete dependency graphs for complex types + +## Tips and Best Practices + +1. **Start with Discovery**: Use `--list-operations` and `--describe` to understand available operations +2. **Use --recurse**: When learning about complex operations, `--describe --recurse` shows the complete type structure +3. **LLM Integration**: Use `--format json` when building AI assistants or automation tools +4. **File-based Variables**: For complex variables, use JSON files instead of inline JSON +5. **Error Handling**: The CLI provides detailed error messages - read them carefully for debugging +6. **Schema Evolution**: Operations and types can change between DataHub versions - use discovery commands to stay current + +## Troubleshooting + +### Common Issues + +**"Introspection not available"**: Use `--schema-path` to point to local GraphQL schema files + +**"Operation not found"**: Check spelling and use `--list-operations` to see available operations + +**"Type not found"**: Verify type name casing (GraphQL types are case-sensitive) + +**Environment issues**: Ensure DataHub server is running and accessible at the configured endpoint diff --git a/docs/cli.md b/docs/cli.md index ba6ebe4e0b..2c2091caa6 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -67,6 +67,7 @@ Commands: exists A group of commands to check existence of entities in DataHub. forms A group of commands to interact with forms in DataHub. get A group of commands to get metadata from DataHub. + graphql Execute GraphQL queries and mutations against DataHub. group A group of commands to interact with the Group entity in DataHub. ingest Ingest metadata into DataHub. init Configure which datahub instance to connect to @@ -495,6 +496,44 @@ $ datahub get --urn "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset, } ``` +### graphql + +The `graphql` command allows you to execute GraphQL queries and mutations against DataHub's GraphQL API. This provides full access to DataHub's metadata through its native GraphQL interface. + +```shell +# Execute a GraphQL query +datahub graphql --query "query { me { username } }" + +# Use named operations from DataHub's schema +datahub graphql --operation searchAcrossEntities --variables '{"input": {"query": "users"}}' + +# List available operations +datahub graphql --list-operations + +# Get help for a specific operation +datahub graphql --describe searchAcrossEntities + +# Explore types recursively +datahub graphql --describe SearchInput --recurse + +# Load queries and variables from files +datahub graphql --query ./search-tags.graphql --variables ./search-params.json + +# Get JSON output for LLM integration +datahub graphql --list-operations --format json +``` + +The GraphQL command supports both raw GraphQL queries/mutations and operation-based execution using DataHub's introspected schema. It automatically detects whether `--query` and `--variables` arguments are file paths or literal content, enabling seamless use of both inline GraphQL and file-based queries. + +Key features: + +- **Schema discovery**: List and describe all available operations and types +- **File support**: Load queries and variables from `.graphql` and `.json` files +- **LLM-friendly output**: JSON format with complete type information +- **Recursive exploration**: Deep-dive into complex GraphQL types + +➡️ [Learn more about the GraphQL command](./cli-commands/graphql.md) + ### put The `put` group of commands allows you to write metadata into DataHub. This is a flexible way for you to issue edits to metadata from the command line. diff --git a/metadata-ingestion/src/datahub/cli/graphql_cli.py b/metadata-ingestion/src/datahub/cli/graphql_cli.py new file mode 100644 index 0000000000..579fac8436 --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/graphql_cli.py @@ -0,0 +1,1422 @@ +import json +import logging +import re +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import click + +from datahub.ingestion.graph.client import get_default_graph +from datahub.ingestion.graph.config import ClientMode +from datahub.upgrade import upgrade + +logger = logging.getLogger(__name__) + +# GraphQL introspection queries (split to avoid "bad faith" protection) +QUERY_INTROSPECTION = """ +query QueryIntrospection { + __schema { + queryType { + name + fields { + name + description + args { + name + type { + name + kind + ofType { + name + kind + ofType { + name + kind + } + } + } + } + } + } + } +} +""" + +MUTATION_INTROSPECTION = """ +query MutationIntrospection { + __schema { + mutationType { + name + fields { + name + description + args { + name + type { + name + kind + ofType { + name + kind + ofType { + name + kind + } + } + } + } + } + } + } +} +""" + + +def _is_file_path(value: str) -> bool: + """Check if a string appears to be a file path and the file exists.""" + if not value or len(value) < 2: + return False + + resolved_path = Path(value).resolve() + return resolved_path.exists() + + +def _load_content_or_file(value: str) -> str: + """Load content from file if value is a file path, otherwise return value as-is.""" + if _is_file_path(value): + resolved_path = Path(value).resolve() + + # Security check: prevent path traversal attacks + if "../" in str(resolved_path) or "..\\" in str(resolved_path): + raise ValueError("Invalid file path: path traversal detected") + + with open(resolved_path, "r") as f: + return f.read() + return value + + +def _parse_variables(variables_str: Optional[str]) -> Optional[Dict[str, Any]]: + """Parse variables from JSON string or file.""" + if not variables_str: + return None + + content = _load_content_or_file(variables_str) + try: + return json.loads(content) + except json.JSONDecodeError as e: + raise click.ClickException(f"Invalid JSON in variables: {e}") from e + + +def _get_schema_files_path() -> Path: + """Get the path to DataHub's GraphQL schema files.""" + # Try to find the schema files relative to the current package + current_file = Path(__file__) + repo_root = current_file + + # Go up directories until we find the repo root (contains datahub-graphql-core) + for _ in range(10): # Safety limit + repo_root = repo_root.parent + graphql_core_path = ( + repo_root / "datahub-graphql-core" / "src" / "main" / "resources" + ) + if graphql_core_path.exists(): + return graphql_core_path + + # Fallback: try common relative paths + possible_paths = [ + Path("../../../datahub-graphql-core/src/main/resources"), + Path("../../../../datahub-graphql-core/src/main/resources"), + Path("datahub-graphql-core/src/main/resources"), + ] + + for path in possible_paths: + if path.exists(): + return path.resolve() + + raise FileNotFoundError("Could not find DataHub GraphQL schema files") + + +def _parse_graphql_operations_from_files( + custom_schema_path: Optional[str] = None, +) -> Dict[str, Any]: + """Parse operations from DataHub's GraphQL schema files as fallback.""" + try: + if custom_schema_path: + schema_path = Path(custom_schema_path) + if not schema_path.exists(): + raise FileNotFoundError( + f"Custom schema path does not exist: {custom_schema_path}" + ) + logger.debug(f"Using custom GraphQL schema path: {schema_path}") + else: + schema_path = _get_schema_files_path() + logger.debug(f"Found GraphQL schema files at: {schema_path}") + + queries = [] + mutations = [] + + # Parse all .graphql files in the directory + for graphql_file in schema_path.glob("*.graphql"): + content = graphql_file.read_text() + + # Extract queries using regex + query_matches = re.finditer( + r"extend\s+type\s+Query\s*\{([^}]+)\}|type\s+Query\s*\{([^}]+)\}", + content, + re.DOTALL | re.IGNORECASE, + ) + + for match in query_matches: + query_content = match.group(1) or match.group(2) + operations = _parse_operations_from_content(query_content, "Query") + queries.extend(operations) + + # Extract mutations using regex + mutation_matches = re.finditer( + r"extend\s+type\s+Mutation\s*\{([^}]+)\}|type\s+Mutation\s*\{([^}]+)\}", + content, + re.DOTALL | re.IGNORECASE, + ) + + for match in mutation_matches: + mutation_content = match.group(1) or match.group(2) + operations = _parse_operations_from_content( + mutation_content, "Mutation" + ) + mutations.extend(operations) + + logger.debug( + f"Parsed {len(queries)} queries and {len(mutations)} mutations from schema files" + ) + + return { + "queryType": {"fields": queries} if queries else None, + "mutationType": {"fields": mutations} if mutations else None, + } + + except Exception as e: + logger.error(f"Failed to parse GraphQL schema files: {e}") + logger.error("Cannot proceed without valid schema information.") + logger.error("Please ensure:") + logger.error("1. DataHub GMS is accessible for schema introspection") + logger.error("2. Schema files exist and are valid GraphQL") + logger.error("3. Network connectivity allows GraphQL requests") + raise click.ClickException( + f"Schema loading failed: {e}. Cannot determine available GraphQL operations." + ) from e + + +def _parse_operations_from_content( + content: str, operation_type: str +) -> List[Dict[str, Any]]: + """Parse individual operations from GraphQL content.""" + operations = [] + + # Match field definitions with optional descriptions + # Pattern matches: fieldName(args): ReturnType or "description" fieldName(args): ReturnType + field_pattern = ( + r'(?:"""([^"]+)"""\s*|"([^"]+)"\s*)?(\w+)(?:\([^)]*\))?\s*:\s*[^,\n]+' + ) + + matches = re.finditer(field_pattern, content, re.MULTILINE) + + for match in matches: + description1, description2, field_name = match.groups() + description = description1 or description2 or "" + + # Skip common GraphQL keywords and types + if field_name.lower() in [ + "query", + "mutation", + "subscription", + "type", + "input", + "enum", + ]: + continue + + operation: Dict[str, Any] = { + "name": field_name, + "description": description.strip(), + "args": [], # We could parse args too, but for now keep it simple + } + operations.append(operation) + + return operations + + +def _format_operation_list( + operations: List[Dict[str, Any]], operation_type: str +) -> str: + """Format operations list for display.""" + if not operations: + return f"No {operation_type.lower()} operations found." + + lines = [f"{operation_type}:"] + for op in operations: + name = op.get("name", "Unknown") + description = op.get("description", "") + if description: + lines.append(f" - {name}: {description}") + else: + lines.append(f" - {name}") + + return "\n".join(lines) + + +def _find_input_type( + schema: Dict[str, Any], type_name: str +) -> Optional[Dict[str, Any]]: + """Find an input type definition in the schema.""" + types = schema.get("types", []) + for type_def in types: + if type_def.get("name") == type_name and type_def.get("kind") == "INPUT_OBJECT": + return type_def + return None + + +def _format_operation_details( + operation: Dict[str, Any], + operation_type: str, + schema: Optional[Dict[str, Any]] = None, +) -> str: + """Format detailed operation information.""" + name = operation.get("name", "Unknown") + description = operation.get("description", "No description available") + args = operation.get("args", []) + + lines = [ + f"Operation: {name}", + f"Type: {operation_type}", + f"Description: {description}", + ] + + if args: + lines.append("Arguments:") + for arg in args: + arg_name = arg.get("name", "unknown") + arg_type = _format_graphql_type(arg.get("type", {})) + lines.append(f" - {arg_name}: {arg_type}") + + # If we have schema info, try to show input type fields + if schema: + # Extract the base type name (remove ! and [] wrappers) + base_type_name = _extract_base_type_name(arg.get("type", {})) + if base_type_name: + input_type = _find_input_type(schema, base_type_name) + if input_type: + input_fields = input_type.get("inputFields", []) + if input_fields: + lines.append(f" Fields in {base_type_name}:") + for field in input_fields: + field_name = field.get("name", "unknown") + field_type = _format_graphql_type(field.get("type", {})) + field_desc = field.get("description", "") + if field_desc: + lines.append( + f" - {field_name}: {field_type} - {field_desc}" + ) + else: + lines.append(f" - {field_name}: {field_type}") + else: + lines.append("Arguments: None") + + return "\n".join(lines) + + +def _format_operation_details_recursive( + operation: Dict[str, Any], operation_type: str, client: Any +) -> str: + """Format detailed operation information with recursive type exploration.""" + name = operation.get("name", "Unknown") + description = operation.get("description", "No description available") + args = operation.get("args", []) + + lines = [ + f"Operation: {name}", + f"Type: {operation_type}", + f"Description: {description}", + ] + + if args: + lines.append("Arguments:") + + # Collect all input types for recursive exploration + all_types_to_explore = set() + + for arg in args: + arg_name = arg.get("name", "unknown") + arg_type = _format_graphql_type(arg.get("type", {})) + lines.append(f" - {arg_name}: {arg_type}") + + # Collect base type name for recursive exploration + base_type_name = _extract_base_type_name(arg.get("type", {})) + if base_type_name and base_type_name not in [ + "String", + "Int", + "Float", + "Boolean", + "ID", + ]: + all_types_to_explore.add(base_type_name) + + # Recursively explore all collected types + if all_types_to_explore: + lines.append("") # Empty line before type details + lines.append("Input Type Details:") + + all_explored_types = {} + for type_name in all_types_to_explore: + logger.debug(f"Recursively exploring input type: {type_name}") + try: + explored_types = _fetch_type_recursive(client, type_name) + all_explored_types.update(explored_types) + except Exception as e: + logger.debug(f"Failed to explore type {type_name}: {e}") + + # Format all explored types + if all_explored_types: + lines.append("") + for type_name in sorted(all_explored_types.keys()): + type_info = all_explored_types[type_name] + lines.append(f"{type_name}:") + lines.extend(_format_single_type_fields(type_info)) + lines.append("") # Empty line between types + else: + lines.append("Arguments: None") + + return "\n".join(lines).rstrip() + + +def _format_type_details(input_type: Dict[str, Any]) -> str: + """Format detailed input type information.""" + type_name = input_type.get("name", "Unknown") + type_kind = input_type.get("kind", "") + input_fields = input_type.get("inputFields", []) + enum_values = input_type.get("enumValues", []) + + lines = [ + f"Type: {type_name}", + f"Kind: {type_kind}", + ] + + if input_fields: + lines.append("Fields:") + for field in input_fields: + field_name = field.get("name", "unknown") + field_type = _format_graphql_type(field.get("type", {})) + field_desc = field.get("description", "") + if field_desc: + lines.append(f" - {field_name}: {field_type} - {field_desc}") + else: + lines.append(f" - {field_name}: {field_type}") + elif enum_values: + lines.append("Enum Values:") + for enum_value in enum_values: + value_name = enum_value.get("name", "unknown") + value_desc = enum_value.get("description", "") + is_deprecated = enum_value.get("isDeprecated", False) + deprecation_reason = enum_value.get("deprecationReason", "") + + value_line = f" - {value_name}" + if value_desc: + value_line += f" - {value_desc}" + if is_deprecated: + if deprecation_reason: + value_line += f" (DEPRECATED: {deprecation_reason})" + else: + value_line += " (DEPRECATED)" + lines.append(value_line) + else: + if type_kind == "ENUM": + lines.append("Enum Values: None") + else: + lines.append("Fields: None") + + return "\n".join(lines) + + +def _collect_nested_types( + type_info: Dict[str, Any], visited: Optional[set] = None +) -> List[str]: + """Collect all nested type names from a GraphQL type definition.""" + if visited is None: + visited = set() + + nested_types = [] + input_fields = type_info.get("inputFields", []) + + for field in input_fields: + field_type = field.get("type", {}) + base_type_name = _extract_base_type_name(field_type) + + if base_type_name and base_type_name not in visited: + # Only collect custom/complex types (not built-in scalars) + if base_type_name not in ["String", "Int", "Float", "Boolean", "ID"]: + nested_types.append(base_type_name) + # Don't add to visited here - let _fetch_type_recursive handle that + + return nested_types + + +def _fetch_type_recursive( + client: Any, type_name: str, visited: Optional[set] = None +) -> Dict[str, Dict[str, Any]]: + """Recursively fetch a type and all its nested types.""" + if visited is None: + visited = set() + + if type_name in visited: + return {} + + visited.add(type_name) + types_map = {} + + # Fetch the current type + try: + targeted_query = f""" + query DescribeType {{ + __type(name: "{type_name}") {{ + name + kind + inputFields {{ + name + description + type {{ + name + kind + ofType {{ + name + kind + ofType {{ + name + kind + }} + }} + }} + }} + enumValues {{ + name + description + isDeprecated + deprecationReason + }} + }} + }} + """ + + type_result = client.execute_graphql(targeted_query) + type_info = type_result.get("__type") + + if type_info: + types_map[type_name] = type_info + + # Find nested types + nested_type_names = _collect_nested_types(type_info, visited) + logger.debug(f"Type '{type_name}' has nested types: {nested_type_names}") + + # Recursively fetch nested types + for nested_type_name in nested_type_names: + logger.debug(f"Recursively fetching nested type: {nested_type_name}") + nested_types = _fetch_type_recursive(client, nested_type_name, visited) + types_map.update(nested_types) + if nested_type_name in nested_types: + logger.debug(f"Successfully fetched type: {nested_type_name}") + else: + logger.debug(f"Failed to fetch type: {nested_type_name}") + + except Exception as e: + logger.debug(f"Failed to fetch type {type_name}: {e}") + + return types_map + + +def _format_single_type_fields( + type_info: Dict[str, Any], indent: str = " " +) -> List[str]: + """Format fields or enum values for a single type.""" + lines = [] + input_fields = type_info.get("inputFields", []) + enum_values = type_info.get("enumValues", []) + type_kind = type_info.get("kind", "") + + if input_fields: + for field in input_fields: + field_name = field.get("name", "unknown") + field_type = _format_graphql_type(field.get("type", {})) + field_desc = field.get("description", "") + if field_desc: + lines.append(f"{indent}{field_name}: {field_type} - {field_desc}") + else: + lines.append(f"{indent}{field_name}: {field_type}") + elif enum_values: + for enum_value in enum_values: + value_name = enum_value.get("name", "unknown") + value_desc = enum_value.get("description", "") + is_deprecated = enum_value.get("isDeprecated", False) + deprecation_reason = enum_value.get("deprecationReason", "") + + value_line = f"{indent}{value_name}" + if value_desc: + value_line += f" - {value_desc}" + if is_deprecated: + if deprecation_reason: + value_line += f" (DEPRECATED: {deprecation_reason})" + else: + value_line += " (DEPRECATED)" + lines.append(value_line) + else: + if type_kind == "ENUM": + lines.append(f"{indent}(no enum values)") + else: + lines.append(f"{indent}(no fields)") + + return lines + + +def _format_recursive_types( + types_map: Dict[str, Dict[str, Any]], root_type_name: str +) -> str: + """Format multiple types in a hierarchical display.""" + lines = [] + + # Display root type first + if root_type_name in types_map: + root_type = types_map[root_type_name] + lines.append(f"{root_type_name}:") + lines.extend(_format_single_type_fields(root_type)) + lines.append("") # Empty line after root type + + # Display nested types + for type_name, type_info in types_map.items(): + if type_name == root_type_name: + continue # Already displayed + + lines.append(f"{type_name}:") + lines.extend(_format_single_type_fields(type_info)) + lines.append("") # Empty line between types + + return "\n".join(lines).rstrip() + + +def _extract_base_type_name(type_info: Dict[str, Any]) -> Optional[str]: + """Extract the base type name from a GraphQL type (removing NON_NULL and LIST wrappers).""" + if not type_info: + return None + + kind = type_info.get("kind", "") + name = type_info.get("name") + of_type = type_info.get("ofType") + + if kind in ["NON_NULL", "LIST"] and of_type: + return _extract_base_type_name(of_type) + elif name: + return name + else: + return None + + +def _format_graphql_type(type_info: Dict[str, Any]) -> str: + """Format GraphQL type information for display.""" + kind = type_info.get("kind", "") + name = type_info.get("name") + of_type = type_info.get("ofType") + + if kind == "NON_NULL": + inner_type = _format_graphql_type(of_type) if of_type else "Unknown" + return f"{inner_type}!" + elif kind == "LIST": + inner_type = _format_graphql_type(of_type) if of_type else "Unknown" + return f"[{inner_type}]" + elif name: + return name + else: + return "Unknown" + + +def _find_operation_by_name( + schema: Dict[str, Any], operation_name: str +) -> Optional[tuple[Dict[str, Any], str]]: + """Find an operation by name in queries or mutations.""" + # Search in queries + query_type = schema.get("queryType", {}) + if query_type: + for field in query_type.get("fields", []): + if field.get("name") == operation_name: + return field, "Query" + + # Search in mutations + mutation_type = schema.get("mutationType", {}) + if mutation_type: + for field in mutation_type.get("fields", []): + if field.get("name") == operation_name: + return field, "Mutation" + + return None + + +def _find_type_by_name(client: Any, type_name: str) -> Optional[Dict[str, Any]]: + """Find a type by name using GraphQL introspection.""" + try: + targeted_query = f""" + query DescribeType {{ + __type(name: "{type_name}") {{ + name + kind + inputFields {{ + name + description + type {{ + name + kind + ofType {{ + name + kind + ofType {{ + name + kind + }} + }} + }} + }} + enumValues {{ + name + description + isDeprecated + deprecationReason + }} + }} + }} + """ + + type_result = client.execute_graphql(targeted_query) + return type_result.get("__type") + + except Exception as e: + logger.debug(f"Failed to fetch type {type_name}: {e}") + return None + + +def _search_operation_and_type( + schema: Dict[str, Any], client: Any, name: str +) -> Tuple[Optional[Tuple[Dict[str, Any], str]], Optional[Dict[str, Any]]]: + """Search for both operation and type with the given name.""" + operation_info = _find_operation_by_name(schema, name) + type_info = _find_type_by_name(client, name) + return operation_info, type_info + + +def _convert_type_to_json(type_info: Dict[str, Any]) -> Dict[str, Any]: + """Convert GraphQL type info to LLM-friendly JSON format.""" + if not type_info: + return {} + + kind = type_info.get("kind", "") + name = type_info.get("name") + of_type = type_info.get("ofType") + + result = {"kind": kind} + + if name: + result["name"] = name + + if kind in ["NON_NULL", "LIST"] and of_type: + result["ofType"] = _convert_type_to_json(of_type) + elif kind == "NON_NULL": + result["nonNull"] = True + elif kind == "LIST": + result["list"] = True + + return result + + +def _convert_operation_to_json( + operation: Dict[str, Any], operation_type: str +) -> Dict[str, Any]: + """Convert operation info to LLM-friendly JSON format.""" + result = { + "name": operation.get("name", ""), + "type": operation_type, + "description": operation.get("description", ""), + "arguments": [], + } + + for arg in operation.get("args", []): + arg_json = { + "name": arg.get("name", ""), + "type": _convert_type_to_json(arg.get("type", {})), + "description": arg.get("description", ""), + } + + # Determine if required based on NON_NULL wrapper + arg_type = arg.get("type", {}) + arg_json["required"] = arg_type.get("kind") == "NON_NULL" + + result["arguments"].append(arg_json) + + return result + + +def _convert_type_details_to_json(type_info: Dict[str, Any]) -> Dict[str, Any]: + """Convert type details to LLM-friendly JSON format.""" + result = { + "name": type_info.get("name", ""), + "kind": type_info.get("kind", ""), + "description": type_info.get("description", ""), + } + + # Handle input fields for INPUT_OBJECT types + input_fields = type_info.get("inputFields", []) + if input_fields: + result["fields"] = [] + for field in input_fields: + field_json = { + "name": field.get("name", ""), + "type": _convert_type_to_json(field.get("type", {})), + "description": field.get("description", ""), + } + result["fields"].append(field_json) + + # Handle enum values for ENUM types + enum_values = type_info.get("enumValues", []) + if enum_values: + result["values"] = [] + for enum_value in enum_values: + value_json = { + "name": enum_value.get("name", ""), + "description": enum_value.get("description", ""), + "deprecated": enum_value.get("isDeprecated", False), + } + if enum_value.get("deprecationReason"): + value_json["deprecationReason"] = enum_value.get("deprecationReason") + result["values"].append(value_json) + + return result + + +def _convert_operations_list_to_json(schema: Dict[str, Any]) -> Dict[str, Any]: + """Convert operations list to LLM-friendly JSON format.""" + result: Dict[str, Any] = {"schema": {"queries": [], "mutations": []}} + + # Convert queries + query_type = schema.get("queryType", {}) + if query_type: + for field in query_type.get("fields", []): + result["schema"]["queries"].append( + _convert_operation_to_json(field, "Query") + ) + + # Convert mutations + mutation_type = schema.get("mutationType", {}) + if mutation_type: + for field in mutation_type.get("fields", []): + result["schema"]["mutations"].append( + _convert_operation_to_json(field, "Mutation") + ) + + return result + + +def _convert_describe_to_json( + operation_info: Optional[tuple[Dict[str, Any], str]], + type_info: Optional[Dict[str, Any]], + types_map: Optional[Dict[str, Dict[str, Any]]] = None, +) -> Dict[str, Any]: + """Convert describe output to LLM-friendly JSON format.""" + result = {} + + if operation_info: + operation_details, operation_type = operation_info + result["operation"] = _convert_operation_to_json( + operation_details, operation_type + ) + + if type_info: + result["type"] = _convert_type_details_to_json(type_info) + + if types_map: + result["relatedTypes"] = {} + for type_name, type_data in types_map.items(): + result["relatedTypes"][type_name] = _convert_type_details_to_json(type_data) + + return result + + +def _dict_to_graphql_input(obj: Dict[str, Any]) -> str: + """Convert a Python dict to GraphQL input syntax.""" + if not isinstance(obj, dict): + return str(obj) + + items = [] + for key, value in obj.items(): + if isinstance(value, str): + items.append(f'{key}: "{value}"') + elif isinstance(value, dict): + items.append(f"{key}: {_dict_to_graphql_input(value)}") + elif isinstance(value, list): + list_items = [] + for item in value: + if isinstance(item, str): + list_items.append(f'"{item}"') + elif isinstance(item, dict): + list_items.append(_dict_to_graphql_input(item)) + else: + list_items.append(str(item)) + items.append(f"{key}: [{', '.join(list_items)}]") + elif isinstance(value, bool): + items.append(f"{key}: {str(value).lower()}") + else: + items.append(f"{key}: {value}") + + return "{" + ", ".join(items) + "}" + + +def _generate_operation_query( + operation_field: Dict[str, Any], + operation_type: str, + variables: Optional[Dict[str, Any]] = None, +) -> str: + """Generate a GraphQL query string from an operation field definition.""" + operation_name = operation_field.get("name", "unknown") + args = operation_field.get("args", []) + + # Build arguments string + args_string = "" + if args: + # Check for required arguments + required_args = [] + optional_args = [] + + for arg in args: + arg_name = arg.get("name") + arg_type = arg.get("type", {}) + if arg_type.get("kind") == "NON_NULL": + required_args.append(arg_name) + else: + optional_args.append(arg_name) + + if variables: + # Build arguments from provided variables + valid_args = [] + for arg in args: + arg_name = arg.get("name") + if arg_name and arg_name in variables: + # Use inline value instead of variable syntax for simplicity + value = variables[arg_name] + if isinstance(value, str): + # Handle string values with quotes + formatted_value = f'"{value}"' + elif isinstance(value, dict): + # Handle object/input types - convert to GraphQL syntax + formatted_value = _dict_to_graphql_input(value) + else: + # Handle numbers, booleans, etc. + formatted_value = ( + str(value).lower() + if isinstance(value, bool) + else str(value) + ) + + valid_args.append(f"{arg_name}: {formatted_value}") + + if valid_args: + args_string = f"({', '.join(valid_args)})" + + # Check if all required arguments are provided + if required_args: + missing_required = [ + arg for arg in required_args if not variables or arg not in variables + ] + if missing_required: + raise click.ClickException( + f"Operation '{operation_name}' requires arguments: {', '.join(missing_required)}. " + f'Provide them using --variables \'{{"{missing_required[0]}": "value", ...}}\'' + ) + + # Generate basic field selection based on common patterns + if operation_name == "me": + # Special case for 'me' query - we know it returns AuthenticatedUser + field_selection = "{ corpUser { urn username properties { displayName email firstName lastName title } } }" + elif operation_name.startswith("list"): + # List operations typically return paginated results + entity_name = operation_name.replace("list", "").lower() + if entity_name == "users": + field_selection = ( + "{ total users { urn username properties { displayName email } } }" + ) + else: + # Generic list response + field_selection = "{ total }" + elif operation_name in ["corpUser", "dataset", "dashboard", "chart"]: + # Entity queries typically return the entity with basic fields + field_selection = "{ urn }" + else: + # Default minimal selection + field_selection = "" + + # Construct the query + operation_keyword = operation_type.lower() + query = f"{operation_keyword} {{ {operation_name}{args_string} {field_selection} }}" + + return query + + +def _get_schema_via_introspection(client: Any) -> Dict[str, Any]: + """Get GraphQL schema via introspection only (no fallback for explicit requests).""" + try: + # Make two separate requests to avoid "bad faith" introspection protection + query_result = client.execute_graphql(QUERY_INTROSPECTION) + mutation_result = client.execute_graphql(MUTATION_INTROSPECTION) + + # Combine results + schema = {} + if query_result and "__schema" in query_result: + schema.update(query_result["__schema"]) + if mutation_result and "__schema" in mutation_result: + schema.update(mutation_result["__schema"]) + + logger.debug("Successfully fetched schema via introspection") + return schema + except Exception as e: + logger.error(f"GraphQL introspection failed: {e}") + logger.error("Cannot perform introspection. Please ensure:") + logger.error("1. DataHub GMS is running and accessible") + logger.error("2. Network connectivity allows GraphQL requests") + logger.error("3. Authentication credentials are valid") + raise click.ClickException( + f"Schema introspection failed: {e}. Cannot retrieve live schema information." + ) from e + + +def _handle_list_operations( + schema: Dict[str, Any], + format: str, + pretty: bool, +) -> None: + """Handle --list-operations and combined --list-queries --list-mutations.""" + if format == "json": + json_output = _convert_operations_list_to_json(schema) + click.echo( + json.dumps(json_output, indent=2 if pretty else None, sort_keys=True) + ) + else: + query_fields = ( + schema.get("queryType", {}).get("fields", []) + if schema.get("queryType") + else [] + ) + mutation_fields = ( + schema.get("mutationType", {}).get("fields", []) + if schema.get("mutationType") + else [] + ) + + output = [] + if query_fields: + output.append(_format_operation_list(query_fields, "Queries")) + if mutation_fields: + output.append(_format_operation_list(mutation_fields, "Mutations")) + + click.echo("\n\n".join(output)) + + +def _handle_list_queries( + schema: Dict[str, Any], + format: str, + pretty: bool, +) -> None: + """Handle --list-queries only.""" + if format == "json": + query_type = schema.get("queryType", {}) + json_output: Dict[str, Any] = { + "schema": { + "queries": [ + _convert_operation_to_json(field, "Query") + for field in query_type.get("fields", []) + ] + } + } + click.echo( + json.dumps(json_output, indent=2 if pretty else None, sort_keys=True) + ) + else: + query_fields = ( + schema.get("queryType", {}).get("fields", []) + if schema.get("queryType") + else [] + ) + click.echo(_format_operation_list(query_fields, "Queries")) + + +def _handle_list_mutations( + schema: Dict[str, Any], + format: str, + pretty: bool, +) -> None: + """Handle --list-mutations only.""" + if format == "json": + mutation_type = schema.get("mutationType", {}) + json_output: Dict[str, Any] = { + "schema": { + "mutations": [ + _convert_operation_to_json(field, "Mutation") + for field in mutation_type.get("fields", []) + ] + } + } + click.echo( + json.dumps(json_output, indent=2 if pretty else None, sort_keys=True) + ) + else: + mutation_fields = ( + schema.get("mutationType", {}).get("fields", []) + if schema.get("mutationType") + else [] + ) + click.echo(_format_operation_list(mutation_fields, "Mutations")) + + +def _get_recursive_types_for_describe( + client: Any, + operation_info: Optional[Tuple[Dict[str, Any], str]], + type_info: Optional[Dict[str, Any]], + describe: str, +) -> Optional[Dict[str, Any]]: + """Get recursive types for describe functionality.""" + types_map = None + try: + if operation_info: + # Collect input types from operation arguments + operation_details, _ = operation_info + all_types = set() + for arg in operation_details.get("args", []): + base_type_name = _extract_base_type_name(arg.get("type", {})) + if base_type_name and base_type_name not in [ + "String", + "Int", + "Float", + "Boolean", + "ID", + ]: + all_types.add(base_type_name) + + # Fetch all related types recursively + all_related_types = {} + for type_name in all_types: + try: + related_types = _fetch_type_recursive(client, type_name) + all_related_types.update(related_types) + except Exception as e: + logger.debug( + f"Failed to fetch recursive types for {type_name}: {e}" + ) + types_map = all_related_types + + elif type_info: + # Fetch recursive types starting from the type itself + try: + types_map = _fetch_type_recursive(client, describe) + except Exception as e: + logger.debug(f"Recursive type fetching failed: {e}") + types_map = None + except Exception as e: + logger.debug(f"Recursive exploration failed: {e}") + types_map = None + + return types_map + + +def _handle_describe_json_output( + operation_info: Optional[Tuple[Dict[str, Any], str]], + type_info: Optional[Dict[str, Any]], + types_map: Optional[Dict[str, Dict[str, Any]]], + describe: str, + recurse: bool, + pretty: bool, +) -> None: + """Handle JSON output for describe functionality.""" + json_output = _convert_describe_to_json(operation_info, type_info, types_map) + + # Add metadata + json_output["meta"] = {"query": describe, "recursive": recurse} + + click.echo(json.dumps(json_output, indent=2 if pretty else None, sort_keys=True)) + + +def _handle_describe_human_output( + schema: Dict[str, Any], + client: Any, + operation_info: Optional[Tuple[Dict[str, Any], str]], + type_info: Optional[Dict[str, Any]], + describe: str, + recurse: bool, +) -> None: + """Handle human-readable output for describe functionality.""" + output_sections = [] + + # Show operation details if found + if operation_info: + operation_details, operation_type = operation_info + + if recurse: + try: + operation_output = _format_operation_details_recursive( + operation_details, operation_type, client + ) + except Exception as e: + logger.debug( + f"Recursive operation details failed ({e}), falling back to standard format" + ) + operation_output = _format_operation_details( + operation_details, operation_type, schema + ) + else: + operation_output = _format_operation_details( + operation_details, operation_type, schema + ) + + output_sections.append(f"=== OPERATION ===\n{operation_output}") + + # Show type details if found + if type_info: + if recurse: + try: + types_map = _fetch_type_recursive(client, describe) + if types_map and describe in types_map: + type_output = _format_recursive_types(types_map, describe) + else: + type_output = _format_type_details(type_info) + except Exception as e: + logger.debug( + f"Recursive type details failed ({e}), falling back to standard format" + ) + type_output = _format_type_details(type_info) + else: + type_output = _format_type_details(type_info) + + output_sections.append(f"=== TYPE ===\n{type_output}") + + # Output results + if len(output_sections) > 1: + # Both operation and type found - show both with separators + click.echo("\n\n".join(output_sections)) + else: + # Only one found - show without section header + output = output_sections[0] + # Remove the section header + if output.startswith("=== OPERATION ===\n"): + output = output[len("=== OPERATION ===\n") :] + elif output.startswith("=== TYPE ===\n"): + output = output[len("=== TYPE ===\n") :] + click.echo(output) + + +def _handle_describe( + schema: Dict[str, Any], + client: Any, + describe: str, + recurse: bool, + format: str, + pretty: bool, +) -> None: + """Handle --describe operation/type.""" + operation_info, type_info = _search_operation_and_type(schema, client, describe) + + if not operation_info and not type_info: + raise click.ClickException( + f"'{describe}' not found as an operation or type. Use --list-operations to see available operations or try a specific type name." + ) + + if format == "json": + types_map = None + if recurse: + types_map = _get_recursive_types_for_describe( + client, operation_info, type_info, describe + ) + + _handle_describe_json_output( + operation_info, + type_info, + types_map, + describe, + recurse, + pretty, + ) + else: + _handle_describe_human_output( + schema, + client, + operation_info, + type_info, + describe, + recurse, + ) + + +def _execute_operation( + client: Any, operation: str, variables: Optional[str], schema_path: Optional[str] +) -> Dict[str, Any]: + """Execute a named GraphQL operation.""" + if schema_path: + schema = _parse_graphql_operations_from_files(schema_path) + else: + schema = _get_schema_via_introspection(client) + + # Find the operation + operation_info = _find_operation_by_name(schema, operation) + if not operation_info: + raise click.ClickException( + f"Operation '{operation}' not found. Use --list-operations to see available operations." + ) + + operation_field, operation_type = operation_info + variables_dict = _parse_variables(variables) + + try: + # Generate the GraphQL query from the operation + generated_query = _generate_operation_query( + operation_field, operation_type, variables_dict + ) + logger.debug(f"Generated query for operation '{operation}': {generated_query}") + + # Execute the generated query + return client.execute_graphql(query=generated_query, variables=variables_dict) + except Exception as e: + raise click.ClickException( + f"Failed to execute operation '{operation}': {e}" + ) from e + + +def _execute_query(client: Any, query: str, variables: Optional[str]) -> Dict[str, Any]: + """Execute a raw GraphQL query.""" + query_content = _load_content_or_file(query) + variables_dict = _parse_variables(variables) + + try: + return client.execute_graphql(query=query_content, variables=variables_dict) + except Exception as e: + raise click.ClickException(f"Failed to execute GraphQL query: {e}") from e + + +@click.command() +@click.option( + "--query", + "-q", + help="GraphQL query string or path to .graphql file", +) +@click.option( + "--variables", + "-v", + help="GraphQL variables as JSON string or path to .json file", +) +@click.option( + "--operation", + "-o", + help="Execute a named GraphQL operation from the schema", +) +@click.option( + "--list-operations", + is_flag=True, + help="List all available GraphQL operations (queries and mutations)", +) +@click.option( + "--list-queries", + is_flag=True, + help="List available GraphQL queries", +) +@click.option( + "--list-mutations", + is_flag=True, + help="List available GraphQL mutations", +) +@click.option( + "--describe", + "-d", + help="Describe a specific GraphQL operation", +) +@click.option( + "--recurse", + is_flag=True, + help="Recursively describe nested types when using --describe", +) +@click.option( + "--schema-path", + help="Path to GraphQL schema files directory (uses local files instead of live introspection)", +) +@click.option( + "--no-pretty", + is_flag=True, + help="Disable pretty-printing of JSON output", +) +@click.option( + "--format", + type=click.Choice(["human", "json"]), + default="human", + help="Output format: human-readable or JSON for LLM consumption", +) +@upgrade.check_upgrade +def graphql( + query: Optional[str], + variables: Optional[str], + operation: Optional[str], + list_operations: bool, + list_queries: bool, + list_mutations: bool, + describe: Optional[str], + recurse: bool, + schema_path: Optional[str], + no_pretty: bool, + format: str, +) -> None: + """Execute GraphQL queries and mutations against DataHub.""" + + pretty = not no_pretty + client = get_default_graph(ClientMode.CLI) + + # Schema introspection commands + if list_operations or list_queries or list_mutations or describe: + if schema_path: + schema = _parse_graphql_operations_from_files(schema_path) + else: + schema = _get_schema_via_introspection(client) + + if list_operations or (list_queries and list_mutations): + _handle_list_operations(schema, format, pretty) + return + elif list_queries: + _handle_list_queries(schema, format, pretty) + return + elif list_mutations: + _handle_list_mutations(schema, format, pretty) + return + elif describe: + _handle_describe( + schema, + client, + describe, + recurse, + format, + pretty, + ) + return + + # Execution commands + if operation: + result = _execute_operation(client, operation, variables, schema_path) + elif query: + result = _execute_query(client, query, variables) + else: + raise click.ClickException( + "Must specify either --query, --operation, or a discovery option " + "(--list-operations, --list-queries, --list-mutations, --describe)" + ) + + # Output result + if pretty: + click.echo(json.dumps(result, indent=2, sort_keys=True)) + else: + click.echo(json.dumps(result)) + + +if __name__ == "__main__": + graphql() diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 77fd3c9574..8481e9b025 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -22,6 +22,7 @@ from datahub.cli.docker_cli import docker from datahub.cli.env_utils import get_boolean_env_variable from datahub.cli.exists_cli import exists from datahub.cli.get_cli import get +from datahub.cli.graphql_cli import graphql from datahub.cli.ingest_cli import ingest from datahub.cli.migrate import migrate from datahub.cli.put_cli import put @@ -169,6 +170,7 @@ datahub.add_command(ingest) datahub.add_command(delete) datahub.add_command(exists) datahub.add_command(get) +datahub.add_command(graphql) datahub.add_command(put) datahub.add_command(state) datahub.add_command(telemetry_cli) diff --git a/metadata-ingestion/tests/unit/cli/test_graphql_cli.py b/metadata-ingestion/tests/unit/cli/test_graphql_cli.py new file mode 100644 index 0000000000..5390304865 --- /dev/null +++ b/metadata-ingestion/tests/unit/cli/test_graphql_cli.py @@ -0,0 +1,2451 @@ +import json +import os +import tempfile +from pathlib import Path +from typing import Any +from unittest.mock import Mock, patch + +import click +import pytest +from click.testing import CliRunner + +from datahub.cli.graphql_cli import ( + _collect_nested_types, + _convert_describe_to_json, + _convert_operation_to_json, + _convert_operations_list_to_json, + _convert_type_details_to_json, + _convert_type_to_json, + _dict_to_graphql_input, + _extract_base_type_name, + _fetch_type_recursive, + _find_operation_by_name, + _find_type_by_name, + _format_graphql_type, + _format_operation_details, + _format_operation_list, + _format_recursive_types, + _format_single_type_fields, + _generate_operation_query, + _is_file_path, + _load_content_or_file, + _parse_graphql_operations_from_files, + _parse_operations_from_content, + _parse_variables, + graphql, +) + + +class TestHelperFunctions: + """Test helper functions in graphql_cli module.""" + + def test_is_file_path_with_existing_file(self): + """Test that _is_file_path returns True for existing files.""" + with tempfile.NamedTemporaryFile(suffix=".graphql", delete=False) as tmp: + tmp.write(b"query { me { username } }") + tmp.flush() + + assert _is_file_path(tmp.name) + assert _is_file_path("./test.graphql") is False # doesn't exist + + # Clean up + Path(tmp.name).unlink() + + def test_is_file_path_with_non_existing_file(self): + """Test that _is_file_path returns False for non-existing files.""" + assert _is_file_path("./non-existent.graphql") is False + assert _is_file_path("/path/to/nowhere.json") is False + assert _is_file_path("query { me }") is False + + def test_is_file_path_with_short_strings(self): + """Test that _is_file_path handles short strings correctly.""" + assert _is_file_path("") is False + assert _is_file_path("a") is False + assert _is_file_path("ab") is False + + def test_is_file_path_with_relative_paths(self): + """Test that _is_file_path handles relative paths correctly.""" + import os + import tempfile + + # Create a temporary directory and file for testing + with tempfile.TemporaryDirectory() as temp_dir: + # Create a test file in the temp directory + test_file = Path(temp_dir) / "test.graphql" + test_file.write_text("query { me { username } }") + + # Change to the temp directory to test relative paths + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + + # Test simple filename (exists in current directory) + assert _is_file_path("test.graphql") is True + assert _is_file_path("nonexistent.graphql") is False + + # Create a subdirectory for testing relative paths + sub_dir = Path(temp_dir) / "subdir" + sub_dir.mkdir() + sub_file = sub_dir / "sub.graphql" + sub_file.write_text("query { search }") + + # Test relative path with ./ + assert _is_file_path("./test.graphql") is True + assert _is_file_path("./subdir/sub.graphql") is True + assert _is_file_path("./nonexistent.graphql") is False + + # Change to subdirectory to test ../ + os.chdir(sub_dir) + assert _is_file_path("../test.graphql") is True + assert _is_file_path("../nonexistent.graphql") is False + + finally: + os.chdir(original_cwd) + + def test_is_file_path_with_absolute_paths(self): + """Test that _is_file_path handles absolute paths correctly.""" + with tempfile.NamedTemporaryFile(suffix=".graphql", delete=False) as tmp: + tmp.write(b"query { me { username } }") + tmp.flush() + + # Test absolute path + assert _is_file_path(tmp.name) is True + + # Clean up + Path(tmp.name).unlink() + + # Test non-existent absolute path + assert _is_file_path(tmp.name) is False + + def test_is_file_path_with_json_files(self): + """Test that _is_file_path works with JSON files.""" + import os + import tempfile + + with tempfile.TemporaryDirectory() as temp_dir: + test_file = Path(temp_dir) / "variables.json" + test_file.write_text('{"key": "value"}') + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + assert _is_file_path("variables.json") is True + assert _is_file_path("./variables.json") is True + + finally: + os.chdir(original_cwd) + + def test_is_file_path_with_graphql_content(self): + """Test that _is_file_path correctly identifies GraphQL content vs file paths.""" + # These should be identified as GraphQL content, not file paths + graphql_queries = [ + "query { me { username } }", + "mutation { deleteEntity(urn: $urn) }", + "query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }", + '{ search(input: { type: TAG, query: "*" }) { total } }', + ] + + for query in graphql_queries: + assert _is_file_path(query) is False + + def test_load_content_or_file_with_file(self): + """Test loading content from a file.""" + content = "query { me { username } }" + with tempfile.NamedTemporaryFile( + mode="w", suffix=".graphql", delete=False + ) as tmp: + tmp.write(content) + tmp.flush() + + result = _load_content_or_file(tmp.name) + assert result == content + + # Clean up + Path(tmp.name).unlink() + + def test_load_content_or_file_with_literal(self): + """Test that literal content is returned as-is.""" + content = "query { me { username } }" + result = _load_content_or_file(content) + assert result == content + + def test_load_content_or_file_with_relative_paths(self): + """Test loading content from files using relative paths.""" + import os + import tempfile + + content1 = "query { me { username } }" + content2 = "query { search(input: { type: TAG }) { total } }" + + with tempfile.TemporaryDirectory() as temp_dir: + # Create test files + test_file = Path(temp_dir) / "test.graphql" + test_file.write_text(content1) + + sub_dir = Path(temp_dir) / "subdir" + sub_dir.mkdir() + sub_file = sub_dir / "sub.graphql" + sub_file.write_text(content2) + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + + # Test simple filename + result = _load_content_or_file("test.graphql") + assert result == content1 + + # Test relative path with ./ + result = _load_content_or_file("./test.graphql") + assert result == content1 + + result = _load_content_or_file("./subdir/sub.graphql") + assert result == content2 + + # Change to subdirectory to test ../ + os.chdir(sub_dir) + result = _load_content_or_file("../test.graphql") + assert result == content1 + + finally: + os.chdir(original_cwd) + + def test_load_content_or_file_with_absolute_paths(self): + """Test loading content from files using absolute paths.""" + content = "query { me { username } }" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".graphql", delete=False + ) as tmp: + tmp.write(content) + tmp.flush() + + # Test absolute path + result = _load_content_or_file(tmp.name) + assert result == content + + # Clean up + Path(tmp.name).unlink() + + def test_load_content_or_file_error_handling(self): + """Test error handling when file path looks like a file but doesn't exist.""" + import os + import tempfile + + with tempfile.TemporaryDirectory() as temp_dir: + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + + # Files that don't exist should be treated as literal content, not files + # This is the expected behavior based on how _is_file_path works + result = _load_content_or_file("nonexistent.graphql") + assert result == "nonexistent.graphql" + + result = _load_content_or_file("../nonexistent.graphql") + assert result == "../nonexistent.graphql" + + finally: + os.chdir(original_cwd) + + def test_parse_variables_with_valid_json(self): + """Test parsing valid JSON variables.""" + variables_str = '{"key": "value", "number": 42}' + result = _parse_variables(variables_str) + assert result == {"key": "value", "number": 42} + + def test_parse_variables_with_none(self): + """Test parsing None variables.""" + assert _parse_variables(None) is None + assert _parse_variables("") is None + + def test_parse_variables_with_invalid_json(self): + """Test parsing invalid JSON raises ClickException.""" + from click import ClickException + + with pytest.raises(ClickException, match="Invalid JSON in variables"): + _parse_variables('{"invalid": json}') + + def test_parse_variables_from_file(self): + """Test parsing variables from a JSON file.""" + variables = {"key": "value", "number": 42} + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp: + json.dump(variables, tmp) + tmp.flush() + + result = _parse_variables(tmp.name) + assert result == variables + + # Clean up + Path(tmp.name).unlink() + + def test_format_graphql_type_simple(self): + """Test formatting simple GraphQL types.""" + type_info = {"kind": "SCALAR", "name": "String"} + assert _format_graphql_type(type_info) == "String" + + def test_format_graphql_type_non_null(self): + """Test formatting non-null GraphQL types.""" + type_info = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}} + assert _format_graphql_type(type_info) == "String!" + + def test_format_graphql_type_list(self): + """Test formatting list GraphQL types.""" + type_info = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}} + assert _format_graphql_type(type_info) == "[String]" + + def test_format_graphql_type_complex(self): + """Test formatting complex GraphQL types.""" + type_info = { + "kind": "NON_NULL", + "ofType": {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}}, + } + assert _format_graphql_type(type_info) == "[String]!" + + def test_format_operation_list_empty(self): + """Test formatting empty operation list.""" + result = _format_operation_list([], "Query") + assert result == "No query operations found." + + def test_format_operation_list_with_operations(self): + """Test formatting operation list with operations.""" + operations = [ + {"name": "me", "description": "Get current user"}, + {"name": "search", "description": "Search entities"}, + ] + result = _format_operation_list(operations, "Query") + expected = "Query:\n - me: Get current user\n - search: Search entities" + assert result == expected + + def test_format_operation_list_without_descriptions(self): + """Test formatting operation list without descriptions.""" + operations = [{"name": "me"}, {"name": "search", "description": ""}] + result = _format_operation_list(operations, "Query") + expected = "Query:\n - me\n - search" + assert result == expected + + def test_format_operation_details(self): + """Test formatting operation details.""" + operation = { + "name": "searchAcrossEntities", + "description": "Search across all entity types", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + } + ], + } + result = _format_operation_details(operation, "Query") + expected = ( + "Operation: searchAcrossEntities\n" + "Type: Query\n" + "Description: Search across all entity types\n" + "Arguments:\n" + " - input: SearchInput!" + ) + assert result == expected + + def test_format_operation_details_no_args(self): + """Test formatting operation details without arguments.""" + operation = {"name": "me", "description": "Get current user", "args": []} + result = _format_operation_details(operation, "Query") + expected = ( + "Operation: me\nType: Query\nDescription: Get current user\nArguments: None" + ) + assert result == expected + + def test_find_operation_by_name_in_queries(self): + """Test finding operation in queries.""" + schema = { + "queryType": { + "fields": [ + {"name": "me", "description": "Get current user"}, + {"name": "search", "description": "Search entities"}, + ] + }, + "mutationType": { + "fields": [{"name": "deleteEntity", "description": "Delete entity"}] + }, + } + + result = _find_operation_by_name(schema, "me") + assert result is not None + operation, operation_type = result + assert operation["name"] == "me" + assert operation_type == "Query" + + def test_find_operation_by_name_in_mutations(self): + """Test finding operation in mutations.""" + schema = { + "queryType": { + "fields": [{"name": "me", "description": "Get current user"}] + }, + "mutationType": { + "fields": [{"name": "deleteEntity", "description": "Delete entity"}] + }, + } + + result = _find_operation_by_name(schema, "deleteEntity") + assert result is not None + operation, operation_type = result + assert operation["name"] == "deleteEntity" + assert operation_type == "Mutation" + + def test_find_operation_by_name_not_found(self): + """Test finding non-existent operation.""" + schema = { + "queryType": {"fields": [{"name": "me", "description": "Get current user"}]} + } + + result = _find_operation_by_name(schema, "nonExistent") + assert result is None + + +class TestGraphQLCommand: + """Test the main GraphQL CLI command.""" + + def setup_method(self): + """Set up test environment.""" + self.runner = CliRunner() + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_raw_query(self, mock_get_graph): + """Test executing raw GraphQL query.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = {"me": {"username": "testuser"}} + mock_get_graph.return_value = mock_client + + result = self.runner.invoke(graphql, ["--query", "query { me { username } }"]) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_once_with( + query="query { me { username } }", variables=None + ) + assert '"me"' in result.output + assert '"username": "testuser"' in result.output + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_query_with_variables(self, mock_get_graph): + """Test executing GraphQL query with variables.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "corpUser": {"info": {"email": "test@example.com"}} + } + mock_get_graph.return_value = mock_client + + result = self.runner.invoke( + graphql, + [ + "--query", + "query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }", + "--variables", + '{"urn": "urn:li:corpuser:test"}', + ], + ) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_once_with( + query="query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }", + variables={"urn": "urn:li:corpuser:test"}, + ) + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_list_operations(self, mock_get_graph): + """Test listing GraphQL operations.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "__schema": { + "queryType": { + "fields": [ + {"name": "me", "description": "Get current user"}, + {"name": "search", "description": "Search entities"}, + ] + }, + "mutationType": { + "fields": [{"name": "deleteEntity", "description": "Delete entity"}] + }, + } + } + mock_get_graph.return_value = mock_client + + result = self.runner.invoke(graphql, ["--list-operations"]) + + assert result.exit_code == 0 + assert "Queries:" in result.output + assert "me: Get current user" in result.output + assert "Mutations:" in result.output + assert "deleteEntity: Delete entity" in result.output + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_describe_operation(self, mock_get_graph): + """Test describing a GraphQL operation.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "__schema": { + "queryType": { + "fields": [ + { + "name": "searchAcrossEntities", + "description": "Search across all entity types", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": { + "kind": "INPUT_OBJECT", + "name": "SearchInput", + }, + }, + } + ], + } + ] + } + } + } + mock_get_graph.return_value = mock_client + + result = self.runner.invoke(graphql, ["--describe", "searchAcrossEntities"]) + + assert result.exit_code == 0 + assert "Operation: searchAcrossEntities" in result.output + assert "Type: Query" in result.output + assert "Description: Search across all entity types" in result.output + assert "input: SearchInput!" in result.output + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_no_arguments(self, mock_get_graph): + """Test GraphQL command with no arguments.""" + # Mock is needed even for argument validation to avoid config errors + mock_client = Mock() + mock_get_graph.return_value = mock_client + + result = self.runner.invoke(graphql, []) + + assert result.exit_code != 0 + assert ( + "Must specify either --query, --operation, or a discovery option" + in result.output + ) + + @patch("datahub.cli.graphql_cli._get_schema_via_introspection") + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_operation_execution_with_mock_error( + self, mock_get_graph, mock_schema + ): + """Test that operation-based execution works but fails with mock serialization error.""" + mock_client = Mock() + # Mock schema introspection to return a valid schema + mock_schema.return_value = { + "queryType": { + "fields": [ + { + "name": "searchAcrossEntities", + "args": [{"name": "input", "type": {"kind": "NON_NULL"}}], + } + ] + } + } + # Mock the execute_graphql to raise a JSON serialization error like in real scenario + mock_client.execute_graphql.side_effect = TypeError( + "Object of type Mock is not JSON serializable" + ) + mock_get_graph.return_value = mock_client + + result = self.runner.invoke( + graphql, + [ + "--operation", + "searchAcrossEntities", + "--variables", + '{"input": {"query": "test"}}', + ], + ) + + assert result.exit_code != 0 + assert "Failed to execute operation 'searchAcrossEntities'" in result.output + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_execution_error(self, mock_get_graph): + """Test handling GraphQL execution errors.""" + mock_client = Mock() + mock_client.execute_graphql.side_effect = Exception("GraphQL error") + mock_get_graph.return_value = mock_client + + result = self.runner.invoke(graphql, ["--query", "query { invalidField }"]) + + assert result.exit_code != 0 + assert "Failed to execute GraphQL query: GraphQL error" in result.output + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_no_pretty_output(self, mock_get_graph): + """Test GraphQL output without pretty printing.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = {"me": {"username": "testuser"}} + mock_get_graph.return_value = mock_client + + result = self.runner.invoke( + graphql, ["--query", "query { me { username } }", "--no-pretty"] + ) + + assert result.exit_code == 0 + # Output should be compact JSON without indentation + assert '{"me": {"username": "testuser"}}' in result.output + + def test_graphql_query_from_file(self): + """Test loading GraphQL query from file.""" + query_content = "query { me { username } }" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".graphql", delete=False + ) as tmp: + tmp.write(query_content) + tmp.flush() + + with patch("datahub.cli.graphql_cli.get_default_graph") as mock_get_graph: + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "me": {"username": "testuser"} + } + mock_get_graph.return_value = mock_client + + result = self.runner.invoke(graphql, ["--query", tmp.name]) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_once_with( + query=query_content, variables=None + ) + + # Clean up + Path(tmp.name).unlink() + + def test_graphql_variables_from_file(self): + """Test loading variables from JSON file.""" + variables = {"urn": "urn:li:corpuser:test"} + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp: + json.dump(variables, tmp) + tmp.flush() + + with patch("datahub.cli.graphql_cli.get_default_graph") as mock_get_graph: + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "corpUser": {"info": {"email": "test@example.com"}} + } + mock_get_graph.return_value = mock_client + + result = self.runner.invoke( + graphql, + [ + "--query", + "query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }", + "--variables", + tmp.name, + ], + ) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_once_with( + query="query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }", + variables=variables, + ) + + # Clean up + Path(tmp.name).unlink() + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_query_from_relative_path(self, mock_get_graph): + """Test loading GraphQL query from relative path.""" + import os + import tempfile + + query_content = "query { me { username } }" + + mock_client = Mock() + mock_client.execute_graphql.return_value = {"me": {"username": "testuser"}} + mock_get_graph.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + # Create test file + test_file = Path(temp_dir) / "test_query.graphql" + test_file.write_text(query_content) + + # Create subdirectory structure for testing different relative paths + sub_dir = Path(temp_dir) / "subdir" + sub_dir.mkdir() + + original_cwd = os.getcwd() + try: + # Test from parent directory with ./ + os.chdir(temp_dir) + result = self.runner.invoke( + graphql, ["--query", "./test_query.graphql"] + ) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_with( + query=query_content, variables=None + ) + + # Reset mock for next test + mock_client.reset_mock() + + # Test from subdirectory with ../ + os.chdir(sub_dir) + result = self.runner.invoke( + graphql, ["--query", "../test_query.graphql"] + ) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_with( + query=query_content, variables=None + ) + + finally: + os.chdir(original_cwd) + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_variables_from_relative_path(self, mock_get_graph): + """Test loading variables from relative JSON file path.""" + import os + import tempfile + + variables = {"urn": "urn:li:corpuser:test"} + query = ( + "query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }" + ) + + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "corpUser": {"info": {"email": "test@example.com"}} + } + mock_get_graph.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + # Create test variables file + vars_file = Path(temp_dir) / "variables.json" + vars_file.write_text(json.dumps(variables)) + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + + result = self.runner.invoke( + graphql, + [ + "--query", + query, + "--variables", + "./variables.json", + ], + ) + + assert result.exit_code == 0 + mock_client.execute_graphql.assert_called_with( + query=query, variables=variables + ) + + finally: + os.chdir(original_cwd) + + @patch("datahub.cli.graphql_cli.get_default_graph") + def test_graphql_query_from_nonexistent_relative_path(self, mock_get_graph): + """Test error handling with non-existent relative path.""" + import os + import tempfile + + # Mock client to handle GraphQL execution + mock_client = Mock() + mock_client.execute_graphql.side_effect = Exception("Query execution failed") + mock_get_graph.return_value = mock_client + + with tempfile.TemporaryDirectory() as temp_dir: + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + + result = self.runner.invoke( + graphql, ["--query", "./nonexistent.graphql"] + ) + + # Should fail because file doesn't exist, but treated as literal query + assert result.exit_code != 0 + assert "Failed to execute GraphQL query" in result.output + + finally: + os.chdir(original_cwd) + + +class TestSchemaFileHandling: + """Test schema file parsing and fallback functionality.""" + + def test_parse_graphql_operations_from_files_error_fallback(self): + """Test that when schema path lookup fails, function falls back gracefully.""" + # Test the error handling by directly calling with None schema path + # which triggers the fallback path lookup that will fail in test environment + result = _parse_graphql_operations_from_files(None) + + # Should return the minimal fallback operations structure + assert "queryType" in result + assert "mutationType" in result + + # Should contain known fallback operations + query_fields = result["queryType"]["fields"] + query_names = [op["name"] for op in query_fields] + assert "me" in query_names + + def test_parse_graphql_operations_from_files_with_custom_path(self): + """Test parsing operations from custom schema path.""" + with tempfile.TemporaryDirectory() as temp_dir: + schema_path = Path(temp_dir) + + # Create a mock GraphQL schema file + schema_file = schema_path / "test.graphql" + schema_content = """ + type Query { + "Get current user" + me: User + "Search entities" + search(query: String!): SearchResults + } + + type Mutation { + "Create a new user" + createUser(input: CreateUserInput!): User + } + """ + schema_file.write_text(schema_content) + + result = _parse_graphql_operations_from_files(str(schema_path)) + + # Should parse queries + assert "queryType" in result + assert result["queryType"] is not None + query_fields = result["queryType"]["fields"] + assert len(query_fields) >= 2 + + # Check specific operations + me_op = next(op for op in query_fields if op["name"] == "me") + assert me_op["description"] == "Get current user" + + search_op = next(op for op in query_fields if op["name"] == "search") + assert search_op["description"] == "Search entities" + + # Should parse mutations + assert "mutationType" in result + assert result["mutationType"] is not None + mutation_fields = result["mutationType"]["fields"] + assert len(mutation_fields) >= 1 + + create_user_op = next( + op for op in mutation_fields if op["name"] == "createUser" + ) + assert create_user_op["description"] == "Create a new user" + + def test_parse_graphql_operations_from_files_nonexistent_custom_path(self): + """Test parsing operations with non-existent custom schema path.""" + # With our improved error handling, this should raise ClickException + nonexistent_path = "/this/path/definitely/does/not/exist/on/any/system" + + with pytest.raises(click.ClickException) as exc_info: + _parse_graphql_operations_from_files(nonexistent_path) + + assert "Custom schema path does not exist" in str(exc_info.value) + + def test_parse_graphql_operations_from_files_fallback_on_error(self): + """Test that parsing raises clear error when schema files can't be found.""" + with patch("datahub.cli.graphql_cli._get_schema_files_path") as mock_get_path: + mock_get_path.side_effect = Exception("Schema files not found") + + with pytest.raises(click.ClickException) as exc_info: + _parse_graphql_operations_from_files() + + assert "Schema loading failed" in str(exc_info.value) + + def test_parse_operations_from_content(self): + """Test parsing operations from GraphQL content string.""" + content = """ + \"\"\"Get current authenticated user\"\"\" + me: AuthenticatedUser + + "Search across all entity types" + searchAcrossEntities(input: SearchInput!): SearchResults + + # This should be skipped as it's not a valid field + type SomeType { + field: String + } + + "Browse entities hierarchically" + browse(path: BrowsePath): BrowseResults + """ + + operations = _parse_operations_from_content(content, "Query") + + assert len(operations) >= 3 + + # Check specific operations were parsed + op_names = [op["name"] for op in operations] + assert "me" in op_names + assert "searchAcrossEntities" in op_names + assert "browse" in op_names + + # Check descriptions were extracted + me_op = next(op for op in operations if op["name"] == "me") + assert "authenticated user" in me_op["description"].lower() + + search_op = next( + op for op in operations if op["name"] == "searchAcrossEntities" + ) + assert "search across all entity types" in search_op["description"].lower() + + def test_parse_operations_from_content_with_keywords(self): + """Test that GraphQL keywords are properly filtered out.""" + content = """ + query: String + mutation: String + subscription: String + type: String + input: String + enum: String + validField: String + """ + + operations = _parse_operations_from_content(content, "Query") + + # Should only contain validField, keywords should be filtered + assert len(operations) == 1 + assert operations[0]["name"] == "validField" + + +class TestOperationGenerationAndQueryBuilding: + """Test operation generation and query building functionality.""" + + def test_dict_to_graphql_input_simple(self): + """Test converting simple dict to GraphQL input syntax.""" + input_dict = {"key": "value", "number": 42, "flag": True} + result = _dict_to_graphql_input(input_dict) + + assert 'key: "value"' in result + assert "number: 42" in result + assert "flag: true" in result + assert result.startswith("{") and result.endswith("}") + + def test_dict_to_graphql_input_nested(self): + """Test converting nested dict to GraphQL input syntax.""" + input_dict = { + "user": {"name": "test", "age": 30}, + "tags": ["tag1", "tag2"], + "metadata": {"active": True}, + } + result = _dict_to_graphql_input(input_dict) + + assert 'user: {name: "test", age: 30}' in result + assert 'tags: ["tag1", "tag2"]' in result + assert "metadata: {active: true}" in result + + def test_dict_to_graphql_input_complex_lists(self): + """Test converting dict with complex list items to GraphQL input syntax.""" + input_dict = { + "users": [ + {"name": "user1", "active": True}, + {"name": "user2", "active": False}, + ], + "values": [1, 2, 3], + "strings": ["a", "b", "c"], + } + result = _dict_to_graphql_input(input_dict) + + assert ( + 'users: [{name: "user1", active: true}, {name: "user2", active: false}]' + in result + ) + assert "values: [1, 2, 3]" in result + assert 'strings: ["a", "b", "c"]' in result + + def test_dict_to_graphql_input_non_dict(self): + """Test handling non-dict input.""" + result = _dict_to_graphql_input("not a dict") # type: ignore + assert result == "not a dict" + + result = _dict_to_graphql_input(123) # type: ignore + assert result == "123" + + def test_generate_operation_query_simple(self): + """Test generating query for simple operation without arguments.""" + operation_field = {"name": "me", "description": "Get current user", "args": []} + + result = _generate_operation_query(operation_field, "Query") + expected = "query { me { corpUser { urn username properties { displayName email firstName lastName title } } } }" + assert result == expected + + def test_generate_operation_query_with_required_args(self): + """Test generating query for operation with required arguments.""" + operation_field = { + "name": "searchAcrossEntities", + "description": "Search across all entity types", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + } + ], + } + variables = {"input": {"query": "test", "start": 0, "count": 10}} + + result = _generate_operation_query(operation_field, "Query", variables) + expected = 'query { searchAcrossEntities(input: {query: "test", start: 0, count: 10}) }' + assert result == expected + + def test_generate_operation_query_missing_required_args(self): + """Test error when required arguments are missing.""" + operation_field = { + "name": "searchAcrossEntities", + "description": "Search across all entity types", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + } + ], + } + + with pytest.raises( + Exception, + match="Operation 'searchAcrossEntities' requires arguments: input", + ): + _generate_operation_query(operation_field, "Query", None) + + def test_generate_operation_query_with_optional_args(self): + """Test generating query with optional arguments.""" + operation_field = { + "name": "browse", + "description": "Browse entities", + "args": [ + { + "name": "path", + "type": {"kind": "SCALAR", "name": "String"}, # Optional + }, + { + "name": "filter", + "type": { + "kind": "INPUT_OBJECT", + "name": "BrowseFilter", + }, # Optional + }, + ], + } + variables = {"path": "datasets"} + + result = _generate_operation_query(operation_field, "Query", variables) + expected = 'query { browse(path: "datasets") }' + assert result == expected + + def test_generate_operation_query_mutation(self): + """Test generating mutation query.""" + operation_field = { + "name": "addTag", + "description": "Add tag to entity", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": { + "kind": "INPUT_OBJECT", + "name": "TagAssociationInput", + }, + }, + } + ], + } + variables = { + "input": {"tagUrn": "urn:li:tag:test", "resourceUrn": "urn:li:dataset:test"} + } + + result = _generate_operation_query(operation_field, "Mutation", variables) + expected = 'mutation { addTag(input: {tagUrn: "urn:li:tag:test", resourceUrn: "urn:li:dataset:test"}) }' + assert result == expected + + def test_generate_operation_query_list_operations(self): + """Test generating queries for list operations.""" + # Test listUsers operation + operation_field = { + "name": "listUsers", + "description": "List all users", + "args": [], + } + + result = _generate_operation_query(operation_field, "Query") + expected = "query { listUsers { total users { urn username properties { displayName email } } } }" + assert result == expected + + # Test other list operation + operation_field = { + "name": "listDatasets", + "description": "List datasets", + "args": [], + } + + result = _generate_operation_query(operation_field, "Query") + expected = "query { listDatasets { total } }" + assert result == expected + + def test_generate_operation_query_entity_operations(self): + """Test generating queries for specific entity operations.""" + entity_operations = [ + ("corpUser", "query { corpUser { urn } }"), + ("dataset", "query { dataset { urn } }"), + ("dashboard", "query { dashboard { urn } }"), + ("chart", "query { chart { urn } }"), + ] + + for op_name, expected in entity_operations: + operation_field = { + "name": op_name, + "description": f"Get {op_name}", + "args": [], + } + + result = _generate_operation_query(operation_field, "Query") + assert result == expected + + def test_generate_operation_query_complex_variables(self): + """Test generating queries with complex variable structures.""" + operation_field = { + "name": "complexOperation", + "description": "Complex operation with nested input", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": {"kind": "INPUT_OBJECT", "name": "ComplexInput"}, + }, + } + ], + } + + complex_variables = { + "input": { + "filters": { + "platform": "snowflake", + "entityTypes": ["DATASET", "TABLE"], + }, + "sort": {"field": "name", "direction": "ASC"}, + "pagination": {"start": 0, "count": 20}, + } + } + + result = _generate_operation_query(operation_field, "Query", complex_variables) + + # Should contain the complex nested structure + assert "complexOperation(input: {" in result + assert 'platform: "snowflake"' in result + assert 'entityTypes: ["DATASET", "TABLE"]' in result + assert 'direction: "ASC"' in result + + def test_generate_operation_query_boolean_handling(self): + """Test that boolean values are properly formatted.""" + operation_field = { + "name": "testOperation", + "description": "Test operation with boolean", + "args": [ + {"name": "input", "type": {"kind": "INPUT_OBJECT", "name": "TestInput"}} + ], + } + + variables = { + "input": { + "active": True, + "deprecated": False, + "count": 0, # Should not be converted to boolean + } + } + + result = _generate_operation_query(operation_field, "Query", variables) + + assert "active: true" in result + assert "deprecated: false" in result + assert "count: 0" in result + + def test_generate_operation_query_string_escaping(self): + """Test that string values are properly quoted and escaped.""" + operation_field = { + "name": "testOperation", + "description": "Test operation with strings", + "args": [ + {"name": "input", "type": {"kind": "INPUT_OBJECT", "name": "TestInput"}} + ], + } + + variables = { + "input": { + "name": "test entity", + "description": 'A test description with "quotes"', + "number": 42, + } + } + + result = _generate_operation_query(operation_field, "Query", variables) + + assert 'name: "test entity"' in result + assert "number: 42" in result # Numbers should not be quoted + + +class TestTypeIntrospectionAndRecursiveExploration: + """Test type introspection and recursive type exploration functionality.""" + + def test_extract_base_type_name_simple(self): + """Test extracting base type name from simple type.""" + type_info = {"kind": "SCALAR", "name": "String"} + result = _extract_base_type_name(type_info) + assert result == "String" + + def test_extract_base_type_name_non_null(self): + """Test extracting base type name from NON_NULL wrapper.""" + type_info = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}} + result = _extract_base_type_name(type_info) + assert result == "String" + + def test_extract_base_type_name_list(self): + """Test extracting base type name from LIST wrapper.""" + type_info = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}} + result = _extract_base_type_name(type_info) + assert result == "String" + + def test_extract_base_type_name_nested_wrappers(self): + """Test extracting base type name from nested wrappers.""" + type_info = { + "kind": "NON_NULL", + "ofType": { + "kind": "LIST", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + } + result = _extract_base_type_name(type_info) + assert result == "SearchInput" + + def test_extract_base_type_name_empty(self): + """Test extracting base type name from empty or invalid type.""" + assert _extract_base_type_name({}) is None + assert _extract_base_type_name(None) is None # type: ignore + assert _extract_base_type_name({"kind": "NON_NULL"}) is None # Missing ofType + + def test_find_type_by_name(self): + """Test finding a type by name using GraphQL introspection.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = { + "__type": { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "inputFields": [ + { + "name": "query", + "description": "Search query string", + "type": {"kind": "SCALAR", "name": "String"}, + }, + { + "name": "start", + "description": "Start offset", + "type": {"kind": "SCALAR", "name": "Int"}, + }, + ], + } + } + + result = _find_type_by_name(mock_client, "SearchInput") + + assert result is not None + assert result["name"] == "SearchInput" + assert result["kind"] == "INPUT_OBJECT" + assert len(result["inputFields"]) == 2 + + # Verify the query was executed correctly + mock_client.execute_graphql.assert_called_once() + call_args = mock_client.execute_graphql.call_args + query_arg = ( + call_args[1]["query"] + if len(call_args) > 1 and "query" in call_args[1] + else call_args[0][0] + ) + assert "SearchInput" in query_arg + + def test_find_type_by_name_not_found(self): + """Test finding a non-existent type.""" + mock_client = Mock() + mock_client.execute_graphql.return_value = {"__type": None} + + result = _find_type_by_name(mock_client, "NonExistentType") + assert result is None + + def test_find_type_by_name_error(self): + """Test error handling when introspection fails.""" + mock_client = Mock() + mock_client.execute_graphql.side_effect = Exception("GraphQL error") + + result = _find_type_by_name(mock_client, "SearchInput") + assert result is None + + def test_collect_nested_types(self): + """Test collecting nested type names from a type definition.""" + type_info = { + "inputFields": [ + { + "name": "filter", + "type": {"kind": "INPUT_OBJECT", "name": "FilterInput"}, + }, + { + "name": "tags", + "type": { + "kind": "LIST", + "ofType": {"kind": "INPUT_OBJECT", "name": "TagInput"}, + }, + }, + { + "name": "name", + "type": { + "kind": "SCALAR", + "name": "String", + }, # Should be filtered out + }, + { + "name": "count", + "type": {"kind": "SCALAR", "name": "Int"}, # Should be filtered out + }, + ] + } + + result = _collect_nested_types(type_info) + + assert len(result) == 2 + assert "FilterInput" in result + assert "TagInput" in result + # Scalar types should not be included + assert "String" not in result + assert "Int" not in result + + def test_collect_nested_types_with_visited(self): + """Test collecting nested types with visited set to avoid duplicates.""" + type_info = { + "inputFields": [ + { + "name": "filter1", + "type": {"kind": "INPUT_OBJECT", "name": "FilterInput"}, + }, + { + "name": "filter2", + "type": { + "kind": "INPUT_OBJECT", + "name": "FilterInput", + }, # Duplicate + }, + ] + } + + visited: set[str] = set() + result = _collect_nested_types(type_info, visited) + + # The function doesn't deduplicate internally - it returns all found types + # Deduplication happens at a higher level in the recursive fetching + assert "FilterInput" in result + assert len(result) == 2 # Two references to the same type + + def test_fetch_type_recursive(self): + """Test recursively fetching a type and its nested types.""" + mock_client = Mock() + + # Mock responses for different types + def mock_execute_graphql(query, **kwargs): + if "SearchInput" in query: + return { + "__type": { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "inputFields": [ + { + "name": "filter", + "type": {"kind": "INPUT_OBJECT", "name": "FilterInput"}, + }, + { + "name": "query", + "type": {"kind": "SCALAR", "name": "String"}, + }, + ], + } + } + elif "FilterInput" in query: + return { + "__type": { + "name": "FilterInput", + "kind": "INPUT_OBJECT", + "inputFields": [ + { + "name": "platform", + "type": {"kind": "SCALAR", "name": "String"}, + } + ], + } + } + return {"__type": None} + + mock_client.execute_graphql.side_effect = mock_execute_graphql + + result = _fetch_type_recursive(mock_client, "SearchInput") + + # Should contain both types + assert "SearchInput" in result + assert "FilterInput" in result + + # Verify structure + search_input = result["SearchInput"] + assert search_input["name"] == "SearchInput" + assert search_input["kind"] == "INPUT_OBJECT" + + filter_input = result["FilterInput"] + assert filter_input["name"] == "FilterInput" + assert filter_input["kind"] == "INPUT_OBJECT" + + def test_fetch_type_recursive_circular_reference(self): + """Test handling of circular type references.""" + mock_client = Mock() + + # Create a circular reference scenario + def mock_execute_graphql(query, **kwargs): + if "TypeA" in query: + return { + "__type": { + "name": "TypeA", + "kind": "INPUT_OBJECT", + "inputFields": [ + { + "name": "typeB", + "type": {"kind": "INPUT_OBJECT", "name": "TypeB"}, + } + ], + } + } + elif "TypeB" in query: + return { + "__type": { + "name": "TypeB", + "kind": "INPUT_OBJECT", + "inputFields": [ + { + "name": "typeA", + "type": { + "kind": "INPUT_OBJECT", + "name": "TypeA", + }, # Circular reference + } + ], + } + } + return {"__type": None} + + mock_client.execute_graphql.side_effect = mock_execute_graphql + + result = _fetch_type_recursive(mock_client, "TypeA") + + # Should handle circular reference without infinite loop + assert "TypeA" in result + assert "TypeB" in result + assert len(result) == 2 # No duplicates + + def test_fetch_type_recursive_error_handling(self): + """Test error handling during recursive type fetching.""" + mock_client = Mock() + mock_client.execute_graphql.side_effect = Exception("GraphQL error") + + result = _fetch_type_recursive(mock_client, "SearchInput") + + # Should return empty dict on error + assert result == {} + + def test_format_single_type_fields_input_object(self): + """Test formatting fields for an INPUT_OBJECT type.""" + type_info = { + "kind": "INPUT_OBJECT", + "inputFields": [ + { + "name": "query", + "description": "Search query string", + "type": {"kind": "SCALAR", "name": "String"}, + }, + { + "name": "filter", + "type": {"kind": "INPUT_OBJECT", "name": "FilterInput"}, + }, + ], + } + + result = _format_single_type_fields(type_info) + + assert len(result) == 2 + assert " query: String - Search query string" in result + assert " filter: FilterInput" in result + + def test_format_single_type_fields_enum(self): + """Test formatting enum values for an ENUM type.""" + type_info = { + "kind": "ENUM", + "enumValues": [ + { + "name": "ACTIVE", + "description": "Entity is active", + "isDeprecated": False, + }, + { + "name": "DEPRECATED_VALUE", + "description": "Old value", + "isDeprecated": True, + "deprecationReason": "Use ACTIVE instead", + }, + ], + } + + result = _format_single_type_fields(type_info) + + assert len(result) == 2 + assert " ACTIVE - Entity is active" in result + assert ( + " DEPRECATED_VALUE - Old value (DEPRECATED: Use ACTIVE instead)" in result + ) + + def test_format_single_type_fields_empty(self): + """Test formatting empty type (no fields or enum values).""" + # Empty INPUT_OBJECT + type_info = {"kind": "INPUT_OBJECT", "inputFields": []} + result = _format_single_type_fields(type_info) + assert result == [" (no fields)"] + + # Empty ENUM + type_info = {"kind": "ENUM", "enumValues": []} + result = _format_single_type_fields(type_info) + assert result == [" (no enum values)"] + + def test_format_recursive_types(self): + """Test formatting multiple types in hierarchical display.""" + types_map = { + "SearchInput": { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "inputFields": [ + {"name": "query", "type": {"kind": "SCALAR", "name": "String"}} + ], + }, + "FilterInput": { + "name": "FilterInput", + "kind": "INPUT_OBJECT", + "inputFields": [ + {"name": "platform", "type": {"kind": "SCALAR", "name": "String"}} + ], + }, + } + + result = _format_recursive_types(types_map, "SearchInput") + + # Should display root type first + lines = result.split("\n") + assert "SearchInput:" in lines[0] + assert " query: String" in result + + # Should display nested types + assert "FilterInput:" in result + assert " platform: String" in result + + def test_format_recursive_types_root_type_missing(self): + """Test formatting when root type is not in the types map.""" + types_map = { + "FilterInput": { + "name": "FilterInput", + "kind": "INPUT_OBJECT", + "inputFields": [], + } + } + + result = _format_recursive_types(types_map, "SearchInput") + + # Should still display other types + assert "FilterInput:" in result + # Should not crash when root type is missing + + +class TestJSONOutputFormatting: + """Test JSON output formatting for LLM consumption.""" + + def test_convert_type_to_json_simple(self): + """Test converting simple GraphQL type to JSON format.""" + type_info = {"kind": "SCALAR", "name": "String"} + result = _convert_type_to_json(type_info) + + expected = {"kind": "SCALAR", "name": "String"} + assert result == expected + + def test_convert_type_to_json_non_null(self): + """Test converting NON_NULL type to JSON format.""" + type_info = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}} + result = _convert_type_to_json(type_info) + + expected = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}} + assert result == expected + + def test_convert_type_to_json_list(self): + """Test converting LIST type to JSON format.""" + type_info = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}} + result = _convert_type_to_json(type_info) + + expected = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}} + assert result == expected + + def test_convert_type_to_json_complex(self): + """Test converting complex nested type to JSON format.""" + type_info = { + "kind": "NON_NULL", + "ofType": { + "kind": "LIST", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + } + result = _convert_type_to_json(type_info) + + expected = { + "kind": "NON_NULL", + "ofType": { + "kind": "LIST", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + } + assert result == expected + + def test_convert_type_to_json_empty(self): + """Test converting empty type info.""" + result = _convert_type_to_json({}) + assert result == {} + + result = _convert_type_to_json(None) # type: ignore + assert result == {} + + def test_convert_operation_to_json(self): + """Test converting operation info to JSON format.""" + operation = { + "name": "searchAcrossEntities", + "description": "Search across all entity types", + "args": [ + { + "name": "input", + "description": "Search input parameters", + "type": { + "kind": "NON_NULL", + "ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + }, + }, + { + "name": "limit", + "description": "Maximum results to return", + "type": {"kind": "SCALAR", "name": "Int"}, # Optional + }, + ], + } + + result = _convert_operation_to_json(operation, "Query") + + assert result["name"] == "searchAcrossEntities" + assert result["type"] == "Query" + assert result["description"] == "Search across all entity types" + assert len(result["arguments"]) == 2 + + # Check required argument + input_arg = result["arguments"][0] + assert input_arg["name"] == "input" + assert input_arg["description"] == "Search input parameters" + assert input_arg["required"] is True + assert input_arg["type"]["kind"] == "NON_NULL" + + # Check optional argument + limit_arg = result["arguments"][1] + assert limit_arg["name"] == "limit" + assert limit_arg["required"] is False + assert limit_arg["type"]["kind"] == "SCALAR" + + def test_convert_operation_to_json_no_args(self): + """Test converting operation with no arguments to JSON format.""" + operation = {"name": "me", "description": "Get current user", "args": []} + + result = _convert_operation_to_json(operation, "Query") + + assert result["name"] == "me" + assert result["type"] == "Query" + assert result["description"] == "Get current user" + assert result["arguments"] == [] + + def test_convert_type_details_to_json_input_object(self): + """Test converting INPUT_OBJECT type details to JSON format.""" + type_info = { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "description": "Input for search operations", + "inputFields": [ + { + "name": "query", + "description": "Search query string", + "type": {"kind": "SCALAR", "name": "String"}, + }, + { + "name": "filter", + "description": "Search filters", + "type": {"kind": "INPUT_OBJECT", "name": "FilterInput"}, + }, + ], + } + + result = _convert_type_details_to_json(type_info) + + assert result["name"] == "SearchInput" + assert result["kind"] == "INPUT_OBJECT" + assert result["description"] == "Input for search operations" + assert len(result["fields"]) == 2 + + query_field = result["fields"][0] + assert query_field["name"] == "query" + assert query_field["description"] == "Search query string" + assert query_field["type"]["kind"] == "SCALAR" + + def test_convert_type_details_to_json_enum(self): + """Test converting ENUM type details to JSON format.""" + type_info = { + "name": "EntityType", + "kind": "ENUM", + "description": "Types of entities in DataHub", + "enumValues": [ + { + "name": "DATASET", + "description": "Dataset entity", + "isDeprecated": False, + }, + { + "name": "LEGACY_TYPE", + "description": "Old entity type", + "isDeprecated": True, + "deprecationReason": "Use DATASET instead", + }, + ], + } + + result = _convert_type_details_to_json(type_info) + + assert result["name"] == "EntityType" + assert result["kind"] == "ENUM" + assert result["description"] == "Types of entities in DataHub" + assert len(result["values"]) == 2 + + dataset_value = result["values"][0] + assert dataset_value["name"] == "DATASET" + assert dataset_value["description"] == "Dataset entity" + assert dataset_value["deprecated"] is False + + legacy_value = result["values"][1] + assert legacy_value["name"] == "LEGACY_TYPE" + assert legacy_value["deprecated"] is True + assert legacy_value["deprecationReason"] == "Use DATASET instead" + + def test_convert_operations_list_to_json(self): + """Test converting full operations list to JSON format.""" + schema = { + "queryType": { + "fields": [ + {"name": "me", "description": "Get current user", "args": []}, + { + "name": "search", + "description": "Search entities", + "args": [ + { + "name": "query", + "type": {"kind": "SCALAR", "name": "String"}, + } + ], + }, + ] + }, + "mutationType": { + "fields": [ + { + "name": "addTag", + "description": "Add tag to entity", + "args": [ + { + "name": "input", + "type": { + "kind": "NON_NULL", + "ofType": { + "kind": "INPUT_OBJECT", + "name": "TagInput", + }, + }, + } + ], + } + ] + }, + } + + result = _convert_operations_list_to_json(schema) + + assert "schema" in result + assert "queries" in result["schema"] + assert "mutations" in result["schema"] + + # Check queries + queries = result["schema"]["queries"] + assert len(queries) == 2 + assert queries[0]["name"] == "me" + assert queries[0]["type"] == "Query" + assert queries[1]["name"] == "search" + + # Check mutations + mutations = result["schema"]["mutations"] + assert len(mutations) == 1 + assert mutations[0]["name"] == "addTag" + assert mutations[0]["type"] == "Mutation" + + def test_convert_operations_list_to_json_empty_schema(self): + """Test converting empty schema to JSON format.""" + schema: dict[str, Any] = {} + result = _convert_operations_list_to_json(schema) + + assert result == {"schema": {"queries": [], "mutations": []}} + + def test_convert_describe_to_json_operation_only(self): + """Test converting describe output with operation only.""" + operation_info = ( + { + "name": "searchAcrossEntities", + "description": "Search across all entity types", + "args": [], + }, + "Query", + ) + + result = _convert_describe_to_json(operation_info, None, None) + + assert "operation" in result + assert result["operation"]["name"] == "searchAcrossEntities" + assert result["operation"]["type"] == "Query" + assert "type" not in result + assert "relatedTypes" not in result + + def test_convert_describe_to_json_type_only(self): + """Test converting describe output with type only.""" + type_info = { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "inputFields": [ + {"name": "query", "type": {"kind": "SCALAR", "name": "String"}} + ], + } + + result = _convert_describe_to_json(None, type_info, None) + + assert "type" in result + assert result["type"]["name"] == "SearchInput" + assert result["type"]["kind"] == "INPUT_OBJECT" + assert "operation" not in result + assert "relatedTypes" not in result + + def test_convert_describe_to_json_with_related_types(self): + """Test converting describe output with related types.""" + operation_info = ( + { + "name": "search", + "description": "Search operation", + "args": [ + { + "name": "input", + "type": {"kind": "INPUT_OBJECT", "name": "SearchInput"}, + } + ], + }, + "Query", + ) + + type_info = {"name": "SearchInput", "kind": "INPUT_OBJECT", "inputFields": []} + + related_types = { + "SearchInput": { + "name": "SearchInput", + "kind": "INPUT_OBJECT", + "inputFields": [], + }, + "FilterInput": { + "name": "FilterInput", + "kind": "INPUT_OBJECT", + "inputFields": [], + }, + } + + result = _convert_describe_to_json(operation_info, type_info, related_types) + + assert "operation" in result + assert "type" in result + assert "relatedTypes" in result + assert len(result["relatedTypes"]) == 2 + assert "SearchInput" in result["relatedTypes"] + assert "FilterInput" in result["relatedTypes"] + + def test_convert_describe_to_json_all_none(self): + """Test converting describe output when everything is None.""" + result = _convert_describe_to_json(None, None, None) + assert result == {} + + def test_json_formatting_preserves_structure(self): + """Test that JSON formatting preserves all necessary structure for LLMs.""" + # Complex operation with nested types + operation = { + "name": "complexSearch", + "description": "Complex search with multiple parameters", + "args": [ + { + "name": "input", + "description": "Search input", + "type": { + "kind": "NON_NULL", + "ofType": { + "kind": "INPUT_OBJECT", + "name": "ComplexSearchInput", + }, + }, + }, + { + "name": "options", + "description": "Search options", + "type": { + "kind": "LIST", + "ofType": {"kind": "ENUM", "name": "SearchOption"}, + }, + }, + ], + } + + result = _convert_operation_to_json(operation, "Query") + + # Verify complete structure is preserved + assert result["name"] == "complexSearch" + assert result["type"] == "Query" + assert result["description"] == "Complex search with multiple parameters" + assert len(result["arguments"]) == 2 + + # Verify nested type structure is preserved + input_arg = result["arguments"][0] + assert input_arg["required"] is True + assert input_arg["type"]["kind"] == "NON_NULL" + assert input_arg["type"]["ofType"]["kind"] == "INPUT_OBJECT" + assert input_arg["type"]["ofType"]["name"] == "ComplexSearchInput" + + options_arg = result["arguments"][1] + assert options_arg["required"] is False + assert options_arg["type"]["kind"] == "LIST" + assert options_arg["type"]["ofType"]["kind"] == "ENUM" + assert options_arg["type"]["ofType"]["name"] == "SearchOption" + + +class TestCoverageImprovementTargets: + """Test specific uncovered code paths to improve coverage.""" + + def test_parse_graphql_operations_file_not_found_error(self): + """Test error when schema files cannot be found.""" + with patch( + "datahub.cli.graphql_cli._get_schema_files_path", + side_effect=FileNotFoundError, + ): + with pytest.raises(click.ClickException) as exc_info: + _parse_graphql_operations_from_files("/nonexistent/path") + + assert "Schema loading failed" in str(exc_info.value) + + +class TestCLIArgumentValidationAndEdgeCases: + """Test CLI argument validation and edge case handling.""" + + def test_invalid_operation_name_handling(self): + """Test handling of invalid operation names.""" + # Test with empty operations list + result = _format_operation_list([], "query") + assert "No query operations found" in result + + def test_parse_variables_malformed_json(self): + """Test handling of malformed variable JSON.""" + # Test malformed JSON variables + from click.exceptions import ClickException + + with pytest.raises( + ClickException + ): # Should raise ClickException on JSON parse error + _parse_variables('{"malformed": json') + + def test_parse_variables_empty_input(self): + """Test handling of empty variable input.""" + result = _parse_variables("") + assert result is None + + result = _parse_variables(None) + assert result is None + + def test_output_format_validation_edge_cases(self): + """Test output format validation with edge cases.""" + # Test with empty operations list for mutations + result = _format_operation_list([], "mutation") + assert "No mutation operations found" in result + + +class TestComplexTypeResolutionScenarios: + """Test complex type resolution scenarios for better coverage.""" + + def test_deeply_nested_type_resolution(self): + """Test deeply nested type structures.""" + base_type = _extract_base_type_name( + { + "kind": "NON_NULL", + "ofType": { + "kind": "LIST", + "ofType": { + "kind": "NON_NULL", + "ofType": {"kind": "SCALAR", "name": "String"}, + }, + }, + } + ) + + assert base_type == "String" + + def test_unknown_type_kind_handling(self): + """Test handling of unknown type kinds.""" + # Test with unknown/unsupported type kind + result = _extract_base_type_name({"kind": "UNKNOWN_KIND", "name": "SomeType"}) + assert result == "SomeType" # Should fallback to name + + def test_type_conversion_edge_cases(self): + """Test type conversion edge cases for JSON output.""" + # Test type with missing optional fields + type_info = { + "kind": "OBJECT", + "name": "IncompleteType", + # Missing fields, description, etc. + } + + result = _convert_type_to_json(type_info) + assert result["name"] == "IncompleteType" + assert result["kind"] == "OBJECT" + # Function only returns basic structure, not fields + + def test_fetch_type_recursive_with_visited_set(self): + """Test recursive type fetching with visited set.""" + schema = { + "types": [ + { + "name": "TestType", + "kind": "OBJECT", + "fields": [ + {"name": "field1", "type": {"kind": "SCALAR", "name": "String"}} + ], + } + ] + } + + visited: set[str] = set() + result = _fetch_type_recursive(schema, "TestType", visited) + + assert result is not None + + +class TestAdvancedJSONOutputFormatting: + """Test advanced JSON output formatting edge cases.""" + + def test_convert_describe_to_json_with_none_inputs(self): + """Test describe functionality with None inputs.""" + # This should handle the case where inputs are None gracefully + result = _convert_describe_to_json(None, None) + # Should return something (empty dict or error info) + assert isinstance(result, dict) + + def test_operation_list_conversion_with_empty_schema(self): + """Test operation list conversion with minimal schema.""" + schema: dict[str, Any] = { + "queryType": {"fields": []}, + "mutationType": {"fields": []}, + } + + result = _convert_operations_list_to_json(schema) + assert isinstance(result, dict) + assert "schema" in result + + def test_json_output_with_special_characters(self): + """Test JSON output handling of special characters.""" + operation = { + "name": "test_with_ñéw_chars", + "description": "Test with special chars: <>&\"'", + "args": [], + } + + result = _convert_operation_to_json(operation, "Query") + assert result["name"] == "test_with_ñéw_chars" + assert "<>&" in result["description"] + + def test_dict_to_graphql_input_edge_cases(self): + """Test dictionary to GraphQL input conversion with edge cases.""" + # Test empty dict + result = _dict_to_graphql_input({}) + assert result == "{}" + + # Test nested structure + nested = {"outer": {"inner": "value", "number": 42}} + result = _dict_to_graphql_input(nested) + assert "outer" in result + assert "inner" in result + + +class TestMainCLIFunction: + """Test the main graphql() CLI function to improve coverage of CLI entry points.""" + + def test_graphql_list_operations_mode(self): + """Test --list-operations CLI mode.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._get_schema_via_introspection" + ) as mock_schema, + patch("datahub.cli.graphql_cli._handle_list_operations") as mock_handler, + ): + mock_schema.return_value = {"test": "schema"} + + # Call CLI command with --list-operations + result = runner.invoke(graphql, ["--list-operations", "--format", "human"]) + + # Should execute successfully + assert result.exit_code == 0 + # Verify the correct handler was called + mock_handler.assert_called_once() + + def test_graphql_list_queries_mode(self): + """Test --list-queries CLI mode.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._get_schema_via_introspection" + ) as mock_schema, + patch("datahub.cli.graphql_cli._handle_list_queries") as mock_handler, + ): + mock_schema.return_value = {"test": "schema"} + + # Call CLI command with --list-queries + result = runner.invoke(graphql, ["--list-queries", "--format", "json"]) + + # Should execute successfully + assert result.exit_code == 0 + # Verify the correct handler was called + mock_handler.assert_called_once() + + def test_graphql_list_mutations_mode(self): + """Test --list-mutations CLI mode.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._get_schema_via_introspection" + ) as mock_schema, + patch("datahub.cli.graphql_cli._handle_list_mutations") as mock_handler, + ): + mock_schema.return_value = {"test": "schema"} + + # Call CLI command with --list-mutations and --no-pretty + result = runner.invoke( + graphql, ["--list-mutations", "--no-pretty", "--format", "human"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Verify the correct handler was called + mock_handler.assert_called_once() + + def test_graphql_describe_mode(self): + """Test --describe CLI mode with schema introspection.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._get_schema_via_introspection" + ) as mock_schema, + patch("datahub.cli.graphql_cli._handle_describe") as mock_handler, + ): + mock_schema.return_value = {"test": "schema"} + + # Call CLI command with --describe and --recurse (no schema path) + result = runner.invoke( + graphql, + [ + "--describe", + "TestType", + "--recurse", + "--format", + "json", + ], + ) + + # Should execute successfully + assert result.exit_code == 0 + # Verify the correct handler was called + mock_handler.assert_called_once() + + def test_graphql_query_execution_mode(self): + """Test query execution mode.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch("datahub.cli.graphql_cli._execute_query") as mock_execute, + ): + mock_execute.return_value = {"data": {"test": "result"}} + + # Call CLI command with --query + result = runner.invoke( + graphql, ["--query", "{ __typename }", "--format", "json"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Verify query execution was called + mock_execute.assert_called_once() + + def test_graphql_operation_execution_mode(self): + """Test operation execution mode.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch("datahub.cli.graphql_cli._execute_operation") as mock_execute, + ): + mock_execute.return_value = {"data": {"test": "result"}} + + # Call CLI command with --operation and --variables + result = runner.invoke( + graphql, + [ + "--operation", + "testOperation", + "--variables", + '{"var": "value"}', + "--format", + "json", + ], + ) + + # Should execute successfully + assert result.exit_code == 0 + # Verify operation execution was called + mock_execute.assert_called_once() + + def test_graphql_both_queries_and_mutations_list(self): + """Test listing both queries and mutations together.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._get_schema_via_introspection" + ) as mock_schema, + patch("datahub.cli.graphql_cli._handle_list_operations") as mock_handler, + ): + mock_schema.return_value = {"test": "schema"} + + # Call CLI command with both --list-queries and --list-mutations + result = runner.invoke( + graphql, ["--list-queries", "--list-mutations", "--format", "human"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Should call list_operations handler when both are true + mock_handler.assert_called_once() + + def test_graphql_with_custom_schema_path(self): + """Test CLI with custom schema path.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._parse_graphql_operations_from_files" + ) as mock_parse, + patch("datahub.cli.graphql_cli._handle_list_operations"), + ): + mock_parse.return_value = {"test": "schema"} + + # Call CLI command with custom --schema-path + result = runner.invoke( + graphql, + [ + "--list-operations", + "--schema-path", + "/custom/schema/path", + "--format", + "json", + ], + ) + + # Should execute successfully + assert result.exit_code == 0 + # Verify schema was loaded with custom path + mock_parse.assert_called_once_with("/custom/schema/path") + + +class TestCLIFilePathHandling: + """Test CLI file path handling and schema discovery to improve coverage.""" + + def test_graphql_query_with_file_path(self): + """Test CLI with query from file path.""" + runner = CliRunner() + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".graphql", delete=False + ) as f: + f.write("{ __typename }") + temp_path = f.name + + try: + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch("datahub.cli.graphql_cli._execute_query") as mock_execute, + ): + mock_execute.return_value = {"data": {"test": "result"}} + + # Call CLI command with file path as query + result = runner.invoke( + graphql, ["--query", temp_path, "--format", "json"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Should execute the query loaded from file + mock_execute.assert_called_once() + + finally: + os.unlink(temp_path) + + def test_graphql_variables_with_file_path(self): + """Test CLI with variables from file path.""" + runner = CliRunner() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump({"testVar": "testValue"}, f) + temp_path = f.name + + try: + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch("datahub.cli.graphql_cli._execute_query") as mock_execute, + ): + mock_execute.return_value = {"data": {"test": "result"}} + + # Call CLI command with file path as variables + result = runner.invoke( + graphql, + [ + "--query", + "{ __typename }", + "--variables", + temp_path, + "--format", + "json", + ], + ) + + # Should execute successfully + assert result.exit_code == 0 + # Should execute with variables loaded from file + mock_execute.assert_called_once() + + finally: + os.unlink(temp_path) + + +class TestCLIOutputFormatting: + """Test CLI output formatting and pretty-printing to improve coverage.""" + + def test_json_output_format(self): + """Test JSON output format handling.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch("datahub.cli.graphql_cli._execute_query") as mock_execute, + ): + mock_execute.return_value = {"data": {"test": "result"}} + + # Call CLI command with JSON format + result = runner.invoke( + graphql, ["--query", "{ __typename }", "--format", "json"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Should have some output + assert len(result.output) > 0 + + def test_human_output_format(self): + """Test human-readable output format handling.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch("datahub.cli.graphql_cli._execute_query") as mock_execute, + ): + mock_execute.return_value = {"data": {"test": "result"}} + + # Call CLI command with human format + result = runner.invoke( + graphql, ["--query", "{ __typename }", "--format", "human"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Should have some output + assert len(result.output) > 0 + + def test_no_pretty_flag_handling(self): + """Test --no-pretty flag processing.""" + runner = CliRunner() + + with ( + patch("datahub.cli.graphql_cli.get_default_graph"), + patch( + "datahub.cli.graphql_cli._get_schema_via_introspection" + ) as mock_schema, + patch("datahub.cli.graphql_cli._handle_list_operations") as mock_handler, + ): + mock_schema.return_value = {"test": "schema"} + + # Call CLI command with --no-pretty flag + result = runner.invoke( + graphql, ["--list-operations", "--no-pretty", "--format", "json"] + ) + + # Should execute successfully + assert result.exit_code == 0 + # Handler should be called + mock_handler.assert_called_once() diff --git a/smoke-test/tests/cli/graphql_cmd/__init__.py b/smoke-test/tests/cli/graphql_cmd/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/smoke-test/tests/cli/graphql_cmd/sample_queries.graphql b/smoke-test/tests/cli/graphql_cmd/sample_queries.graphql new file mode 100644 index 0000000000..9d197a742c --- /dev/null +++ b/smoke-test/tests/cli/graphql_cmd/sample_queries.graphql @@ -0,0 +1,39 @@ +# Sample GraphQL queries for smoke testing + +# Simple introspection query +query IntrospectionQuery { + __schema { + queryType { + name + } + } +} + +# Basic entity search +query SearchEntities { + search(input: { + type: DATASET + query: "*" + start: 0 + count: 5 + }) { + start + count + total + searchResults { + entity { + urn + type + } + } + } +} + +# List available platforms +query ListPlatforms { + listPlatforms { + name + displayName + type + } +} \ No newline at end of file diff --git a/smoke-test/tests/cli/graphql_cmd/sample_queries.json b/smoke-test/tests/cli/graphql_cmd/sample_queries.json new file mode 100644 index 0000000000..780c999078 --- /dev/null +++ b/smoke-test/tests/cli/graphql_cmd/sample_queries.json @@ -0,0 +1,4 @@ +{ + "query": "{ __schema { queryType { name } } }", + "variables": {} +} \ No newline at end of file diff --git a/smoke-test/tests/cli/graphql_cmd/test_graphql_cli_smoke.py b/smoke-test/tests/cli/graphql_cmd/test_graphql_cli_smoke.py new file mode 100644 index 0000000000..879f50f6e5 --- /dev/null +++ b/smoke-test/tests/cli/graphql_cmd/test_graphql_cli_smoke.py @@ -0,0 +1,335 @@ +""" +Smoke tests for DataHub GraphQL CLI functionality. + +These tests validate the core GraphQL CLI features including: +- Schema discovery and introspection +- File-based query execution with relative paths +- JSON output formatting +- CLI integration with DataHub instances +""" + +import json +import os +import tempfile +from pathlib import Path +from typing import Optional + +import pytest +import requests + +from tests.utils import run_datahub_cmd, wait_for_healthcheck_util + + +class TestGraphQLCLIStandalone: + """Fast standalone tests that don't require full DataHub functionality.""" + + def setup_method(self): + """Set up test environment variables.""" + self.original_env = os.environ.copy() + # Ensure we have the required DataHub connection info + os.environ.setdefault("DATAHUB_GMS_HOST", "http://localhost:8080") + os.environ.setdefault("DATAHUB_GMS_TOKEN", "") + + def teardown_method(self): + """Restore original environment.""" + os.environ.clear() + os.environ.update(self.original_env) + + def _run_datahub_cli( + self, args: list[str], input_data: Optional[str] = None + ) -> tuple[int, str, str]: + """ + Run datahub CLI command and return (exit_code, stdout, stderr). + + Args: + args: CLI arguments (e.g., ['graphql', '--schema']) + input_data: Optional stdin input + + Returns: + Tuple of (exit_code, stdout, stderr) + """ + result = run_datahub_cmd(args, input=input_data) + return result.exit_code, result.stdout, result.stderr + + def test_graphql_help(self): + """Test that GraphQL CLI help is accessible.""" + exit_code, stdout, stderr = self._run_datahub_cli(["graphql", "--help"]) + + assert exit_code == 0, f"CLI help failed with stderr: {stderr}" + assert "GraphQL" in stdout or "graphql" in stdout + assert "--list-operations" in stdout or "--schema-path" in stdout + assert "--query" in stdout + + def test_graphql_schema_discovery(self): + """Test GraphQL schema discovery functionality.""" + # This should work even without authentication for schema discovery + exit_code, stdout, stderr = self._run_datahub_cli( + ["graphql", "--list-operations"] + ) + + # Command may exit with error (no DataHub connection) but should not crash + assert exit_code in [0, 1], f"Unexpected exit code. stderr: {stderr}" + assert "Traceback" not in stderr # No Python crashes + assert ( + "graphql" not in stderr.lower() or "command not found" not in stderr.lower() + ) + + def test_graphql_file_path_handling(self): + """Test that GraphQL CLI properly handles file path arguments.""" + with tempfile.NamedTemporaryFile( + mode="w", suffix=".graphql", delete=False + ) as f: + f.write("{ __typename }") + temp_path = f.name + + try: + # Test with absolute path + exit_code, stdout, stderr = self._run_datahub_cli( + ["graphql", "--query", temp_path, "--format", "json"] + ) + + # Should recognize as file path (may fail due to missing DataHub connection) + assert exit_code in [0, 1], ( + f"Unexpected exit code with file path. stderr: {stderr}" + ) + assert "No such file or directory" not in stderr + assert "FileNotFoundError" not in stderr + + finally: + os.unlink(temp_path) + + def test_graphql_relative_path_handling(self): + """Test that GraphQL CLI handles relative paths correctly.""" + # Create a temporary GraphQL file in a subdirectory + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + query_file = temp_path / "test.graphql" + query_file.write_text("{ __typename }") + + # Change to parent directory and use relative path + original_cwd = os.getcwd() + try: + os.chdir(temp_path.parent) + relative_path = os.path.relpath(str(query_file)) + + exit_code, stdout, stderr = self._run_datahub_cli( + ["graphql", "--query", relative_path, "--format", "json"] + ) + + # Should recognize relative path (may fail due to missing DataHub connection) + assert exit_code in [0, 1], ( + f"Relative path handling failed. stderr: {stderr}" + ) + assert "No such file or directory" not in stderr + assert "FileNotFoundError" not in stderr + + finally: + os.chdir(original_cwd) + + +class TestGraphQLCLIIntegration: + """Integration tests requiring full DataHub functionality.""" + + @pytest.fixture(autouse=True) + def setup_datahub(self, auth_session): + """Ensure DataHub is running and accessible.""" + self.auth_session = auth_session + wait_for_healthcheck_util(requests) + + def _run_authenticated_graphql(self, args: list[str]) -> tuple[int, str, str]: + """Run GraphQL CLI with proper authentication.""" + result = run_datahub_cmd( + args, + env={ + "DATAHUB_GMS_URL": self.auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": self.auth_session.gms_token(), + }, + ) + return result.exit_code, result.stdout, result.stderr + + def test_graphql_schema_introspection(self): + """Test GraphQL schema introspection with authentication.""" + exit_code, stdout, stderr = self._run_authenticated_graphql( + ["graphql", "--list-operations", "--format", "json"] + ) + + assert exit_code == 0, f"Schema introspection failed: {stderr}" + + # Should produce some output showing operations + if stdout.strip(): + # Either JSON format or human-readable format is acceptable + if stdout.strip().startswith("{") or stdout.strip().startswith("["): + try: + schema_data = json.loads(stdout) + assert isinstance(schema_data, (dict, list)) + except json.JSONDecodeError as e: + pytest.fail(f"Schema output is not valid JSON: {e}") + else: + # Human readable format + assert len(stdout.strip()) > 0 + + def test_graphql_simple_query_execution(self): + """Test execution of a simple GraphQL query.""" + simple_query = "{ __typename }" + + exit_code, stdout, stderr = self._run_authenticated_graphql( + ["graphql", "--query", simple_query, "--format", "json"] + ) + + assert exit_code == 0, f"Simple query execution failed: {stderr}" + + # Parse JSON output + try: + result = json.loads(stdout) + assert isinstance(result, dict) + # Should contain query response - either data field or direct response + assert "data" in result or "__typename" in result or len(result) > 0 + except json.JSONDecodeError as e: + pytest.fail(f"Query result is not valid JSON: {e}") + + def test_graphql_query_from_file(self): + """Test GraphQL query execution from file with relative path.""" + query_content = """{ + __typename +}""" + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".graphql", delete=False + ) as f: + f.write(query_content) + temp_path = f.name + + try: + # Test with absolute path + exit_code, stdout, stderr = self._run_authenticated_graphql( + ["graphql", "--query", temp_path, "--format", "json"] + ) + + assert exit_code == 0, f"File query execution failed: {stderr}" + + # Parse JSON output + try: + result = json.loads(stdout) + assert isinstance(result, dict) + assert "data" in result or "__typename" in result or len(result) > 0 + except json.JSONDecodeError as e: + pytest.fail(f"File query result is not valid JSON: {e}") + + finally: + os.unlink(temp_path) + + def test_graphql_list_operations(self): + """Test GraphQL CLI list operations functionality.""" + for operation_type in ["queries", "mutations"]: + exit_code, stdout, stderr = self._run_authenticated_graphql( + ["graphql", f"--list-{operation_type}", "--format", "json"] + ) + + assert exit_code == 0, f"List {operation_type} failed: {stderr}" + + # Should produce some output (might be empty list) + if stdout.strip(): + try: + result = json.loads(stdout) + assert isinstance(result, (list, dict)) + except json.JSONDecodeError: + # Some operations might produce non-JSON output, which is also acceptable + assert len(stdout.strip()) > 0 + + def test_graphql_json_output_format(self): + """Test that JSON output format is properly structured.""" + exit_code, stdout, stderr = self._run_authenticated_graphql( + ["graphql", "--query", "{ __typename }", "--format", "json"] + ) + + assert exit_code == 0, f"JSON output test failed: {stderr}" + + # Verify JSON structure + try: + result = json.loads(stdout) + assert isinstance(result, dict) + + # Should follow GraphQL response format or be direct data + if "data" in result: + assert isinstance(result["data"], (dict, type(None))) + elif "__typename" in result: + # Direct response format + assert isinstance(result, dict) + if "errors" in result: + assert isinstance(result["errors"], list) + + except json.JSONDecodeError as e: + pytest.fail(f"JSON output is malformed: {e}") + + def test_graphql_error_handling(self): + """Test GraphQL CLI error handling with invalid queries.""" + invalid_query = "{ invalidField { doesNotExist } }" + + exit_code, stdout, stderr = self._run_authenticated_graphql( + ["graphql", "--query", invalid_query, "--format", "json"] + ) + + # Should handle errors gracefully + assert exit_code in [0, 1], ( + f"Error handling failed with unexpected exit code. stderr: {stderr}" + ) + + if exit_code == 0: + # If successful, should contain error information in GraphQL response + try: + result = json.loads(stdout) + # GraphQL errors should be in the response + assert "errors" in result or "data" in result + except json.JSONDecodeError: + pytest.fail("Invalid query should produce structured error response") + else: + # If exit code 1, should have meaningful error message + assert len(stderr.strip()) > 0, "Should provide error message on failure" + + +class TestGraphQLCLIFileHandling: + """Specific tests for file handling improvements.""" + + def test_json_file_detection(self): + """Test that CLI properly detects and handles JSON files.""" + json_content = {"query": "{ __typename }"} + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump(json_content, f) + temp_path = f.name + + try: + # Should recognize .json extension + result = run_datahub_cmd(["graphql", "--query", temp_path]) + stderr = result.stderr + + # Should not fail due to file detection issues + assert "No such file or directory" not in stderr + assert "FileNotFoundError" not in stderr + + finally: + os.unlink(temp_path) + + def test_relative_path_resolution(self): + """Test basic relative path resolution scenarios.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Test simple relative path scenario + query_file = temp_path / "query.graphql" + query_file.write_text("{ __typename }") + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + test_path = "./query.graphql" + + result = run_datahub_cmd(["graphql", "--query", test_path]) + stderr = result.stderr + + # File should be found and recognized + assert "No such file or directory" not in stderr + assert "FileNotFoundError" not in stderr + + finally: + os.chdir(original_cwd)