mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-26 16:34:44 +00:00
feat(cli): graphql command (#14781)
This commit is contained in:
parent
e25a82abe3
commit
f9913cd944
502
docs/cli-commands/graphql.md
Normal file
502
docs/cli-commands/graphql.md
Normal file
@ -0,0 +1,502 @@
|
||||
# DataHub GraphQL CLI
|
||||
|
||||
The `datahub graphql` command provides a powerful interface to interact with DataHub's GraphQL API directly from the command line. This enables you to query metadata, perform mutations, and explore the GraphQL schema without writing custom applications.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```shell
|
||||
# Get current user info
|
||||
datahub graphql --operation me
|
||||
|
||||
# Search for datasets
|
||||
datahub graphql --operation searchAcrossEntities --variables '{"input": {"query": "users", "types": ["DATASET"]}}'
|
||||
|
||||
# Execute raw GraphQL
|
||||
datahub graphql --query "query { me { username } }"
|
||||
```
|
||||
|
||||
## Core Features
|
||||
|
||||
### 1. Schema Discovery
|
||||
|
||||
Discover available operations and understand their structure:
|
||||
|
||||
```shell
|
||||
# List all available operations
|
||||
datahub graphql --list-operations
|
||||
|
||||
# List only queries or mutations
|
||||
datahub graphql --list-queries
|
||||
datahub graphql --list-mutations
|
||||
```
|
||||
|
||||
### 2. Smart Description
|
||||
|
||||
The `--describe` command intelligently searches for both operations and types:
|
||||
|
||||
```shell
|
||||
# Describe an operation
|
||||
datahub graphql --describe searchAcrossEntities
|
||||
|
||||
# Describe a GraphQL type
|
||||
datahub graphql --describe SearchInput
|
||||
|
||||
# Describe enum types to see allowed values
|
||||
datahub graphql --describe FilterOperator
|
||||
```
|
||||
|
||||
**When both operation and type exist with same name:**
|
||||
|
||||
```shell
|
||||
datahub graphql --describe someConflictingName
|
||||
# Output:
|
||||
# === OPERATION ===
|
||||
# Operation: someConflictingName
|
||||
# Type: Query
|
||||
# ...
|
||||
#
|
||||
# === TYPE ===
|
||||
# Type: someConflictingName
|
||||
# Kind: INPUT_OBJECT
|
||||
# ...
|
||||
```
|
||||
|
||||
### 3. Recursive Type Exploration
|
||||
|
||||
Use `--recurse` with `--describe` to explore all nested types:
|
||||
|
||||
```shell
|
||||
# Explore operation with all its input types
|
||||
datahub graphql --describe searchAcrossEntities --recurse
|
||||
|
||||
# Explore type with all nested dependencies
|
||||
datahub graphql --describe SearchInput --recurse
|
||||
```
|
||||
|
||||
**Example recursive output:**
|
||||
|
||||
```
|
||||
Operation: searchAcrossEntities
|
||||
Type: Query
|
||||
Description: Search across all entity types
|
||||
Arguments:
|
||||
- input: SearchInput!
|
||||
|
||||
Input Type Details:
|
||||
|
||||
SearchInput:
|
||||
query: String
|
||||
types: [EntityType!]
|
||||
filters: SearchFilter
|
||||
|
||||
SearchFilter:
|
||||
criteria: [FacetFilterInput!]
|
||||
|
||||
FacetFilterInput:
|
||||
field: String! - Name of field to filter by
|
||||
values: [String!]! - Values, one of which the intended field should match
|
||||
condition: FilterOperator - Condition for the values
|
||||
|
||||
FilterOperator:
|
||||
EQUAL - Represents the relation: field = value
|
||||
GREATER_THAN - Represents the relation: field > value
|
||||
LESS_THAN - Represents the relation: field < value
|
||||
```
|
||||
|
||||
### 4. Operation Execution
|
||||
|
||||
Execute operations by name without writing full GraphQL:
|
||||
|
||||
```shell
|
||||
# Execute operation by name
|
||||
datahub graphql --operation me
|
||||
|
||||
# Execute with variables
|
||||
datahub graphql --operation searchAcrossEntities --variables '{"input": {"query": "datasets", "types": ["DATASET"]}}'
|
||||
|
||||
# Execute with variables from file
|
||||
datahub graphql --operation createGroup --variables ./group-data.json
|
||||
```
|
||||
|
||||
### 5. Raw GraphQL Execution
|
||||
|
||||
Execute any custom GraphQL query or mutation:
|
||||
|
||||
```shell
|
||||
# Simple query
|
||||
datahub graphql --query "query { me { username } }"
|
||||
|
||||
# Query with variables
|
||||
datahub graphql --query "query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }" --variables '{"urn": "urn:li:corpuser:john"}'
|
||||
|
||||
# Query from file
|
||||
datahub graphql --query ./complex-query.graphql --variables ./variables.json
|
||||
|
||||
# Mutation
|
||||
datahub graphql --query "mutation { addTag(input: {resourceUrn: \"urn:li:dataset:...\", tagUrn: \"urn:li:tag:Important\"}) }"
|
||||
```
|
||||
|
||||
### 6. File Support
|
||||
|
||||
Both queries and variables can be loaded from files:
|
||||
|
||||
```shell
|
||||
# Load query from file
|
||||
datahub graphql --query ./queries/search-datasets.graphql
|
||||
|
||||
# Load variables from file
|
||||
datahub graphql --operation searchAcrossEntities --variables ./variables/search-params.json
|
||||
|
||||
# Both from files
|
||||
datahub graphql --query ./query.graphql --variables ./vars.json
|
||||
```
|
||||
|
||||
### 7. LLM-Friendly JSON Output
|
||||
|
||||
Use `--format json` to get structured JSON output perfect for LLM consumption:
|
||||
|
||||
```shell
|
||||
# Get operations as JSON for LLM processing
|
||||
datahub graphql --list-operations --format json
|
||||
|
||||
# Describe operation with complete type information
|
||||
datahub graphql --describe searchAcrossEntities --recurse --format json
|
||||
|
||||
# Get type details in structured format
|
||||
datahub graphql --describe SearchInput --format json
|
||||
```
|
||||
|
||||
**Example JSON output for `--list-operations --format json`:**
|
||||
|
||||
```json
|
||||
{
|
||||
"schema": {
|
||||
"queries": [
|
||||
{
|
||||
"name": "me",
|
||||
"type": "Query",
|
||||
"description": "Get current user information",
|
||||
"arguments": []
|
||||
},
|
||||
{
|
||||
"name": "searchAcrossEntities",
|
||||
"type": "Query",
|
||||
"description": "Search across all entity types",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "input",
|
||||
"type": {
|
||||
"kind": "NON_NULL",
|
||||
"ofType": {
|
||||
"name": "SearchInput",
|
||||
"kind": "INPUT_OBJECT"
|
||||
}
|
||||
},
|
||||
"required": true,
|
||||
"description": "Search input parameters"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"mutations": [...]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Example JSON output for `--describe searchAcrossEntities --recurse --format json`:**
|
||||
|
||||
```json
|
||||
{
|
||||
"operation": {
|
||||
"name": "searchAcrossEntities",
|
||||
"type": "Query",
|
||||
"description": "Search across all entity types",
|
||||
"arguments": [...]
|
||||
},
|
||||
"relatedTypes": {
|
||||
"SearchInput": {
|
||||
"name": "SearchInput",
|
||||
"kind": "INPUT_OBJECT",
|
||||
"fields": [
|
||||
{
|
||||
"name": "query",
|
||||
"type": {"name": "String", "kind": "SCALAR"},
|
||||
"description": "Search query string"
|
||||
},
|
||||
{
|
||||
"name": "filters",
|
||||
"type": {"name": "SearchFilter", "kind": "INPUT_OBJECT"},
|
||||
"description": "Optional filters"
|
||||
}
|
||||
]
|
||||
},
|
||||
"SearchFilter": {...},
|
||||
"FilterOperator": {
|
||||
"name": "FilterOperator",
|
||||
"kind": "ENUM",
|
||||
"values": [
|
||||
{
|
||||
"name": "EQUAL",
|
||||
"description": "Represents the relation: field = value",
|
||||
"deprecated": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"meta": {
|
||||
"query": "searchAcrossEntities",
|
||||
"recursive": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8. Custom Schema Path
|
||||
|
||||
When introspection is disabled or for local development:
|
||||
|
||||
```shell
|
||||
# Use local GraphQL schema files
|
||||
datahub graphql --list-operations --schema-path ./local-schemas/
|
||||
|
||||
# Describe with custom schema
|
||||
datahub graphql --describe searchAcrossEntities --schema-path ./graphql-schemas/
|
||||
|
||||
# Get JSON format with custom schema
|
||||
datahub graphql --list-operations --schema-path ./schemas/ --format json
|
||||
```
|
||||
|
||||
## Command Reference
|
||||
|
||||
### Global Options
|
||||
|
||||
| Option | Type | Description |
|
||||
| ------------------- | ------ | -------------------------------------------------------------- |
|
||||
| `--query` | string | GraphQL query/mutation string or path to .graphql file |
|
||||
| `--variables` | string | Variables as JSON string or path to .json file |
|
||||
| `--operation` | string | Execute named operation from DataHub's schema |
|
||||
| `--describe` | string | Describe operation or type (searches both) |
|
||||
| `--recurse` | flag | Recursively explore nested types with --describe |
|
||||
| `--list-operations` | flag | List all available operations |
|
||||
| `--list-queries` | flag | List available query operations |
|
||||
| `--list-mutations` | flag | List available mutation operations |
|
||||
| `--schema-path` | string | Path to GraphQL schema files directory |
|
||||
| `--no-pretty` | flag | Disable pretty-printing of JSON output (default: pretty-print) |
|
||||
| `--format` | choice | Output format: `human` (default) or `json` for LLM consumption |
|
||||
|
||||
### Usage Patterns
|
||||
|
||||
```shell
|
||||
# Discovery
|
||||
datahub graphql --list-operations
|
||||
datahub graphql --describe <name> [--recurse]
|
||||
|
||||
# Execution
|
||||
datahub graphql --operation <name> [--variables <json>]
|
||||
datahub graphql --query <graphql> [--variables <json>]
|
||||
```
|
||||
|
||||
## Advanced Examples
|
||||
|
||||
### Complex Search with Filters
|
||||
|
||||
```shell
|
||||
datahub graphql --operation searchAcrossEntities --variables '{
|
||||
"input": {
|
||||
"query": "customer",
|
||||
"types": ["DATASET", "DASHBOARD"],
|
||||
"filters": [{
|
||||
"field": "platform",
|
||||
"values": ["mysql", "postgres"]
|
||||
}],
|
||||
"start": 0,
|
||||
"count": 20
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### Adding Tags to Multiple Entities
|
||||
|
||||
```shell
|
||||
# Add Important tag to a dataset
|
||||
datahub graphql --query 'mutation AddTag($input: TagAssociationInput!) {
|
||||
addTag(input: $input)
|
||||
}' --variables '{
|
||||
"input": {
|
||||
"resourceUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,db.users,PROD)",
|
||||
"tagUrn": "urn:li:tag:Important"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### Batch User Queries
|
||||
|
||||
```shell
|
||||
# Get multiple users using raw GraphQL
|
||||
datahub graphql --query 'query GetUsers($urns: [String!]!) {
|
||||
users: batchGet(urns: $urns) {
|
||||
... on CorpUser {
|
||||
urn
|
||||
username
|
||||
properties {
|
||||
email
|
||||
displayName
|
||||
}
|
||||
}
|
||||
}
|
||||
}' --variables '{"urns": ["urn:li:corpuser:alice", "urn:li:corpuser:bob"]}'
|
||||
```
|
||||
|
||||
## Schema Introspection
|
||||
|
||||
DataHub's GraphQL CLI provides two modes for schema discovery:
|
||||
|
||||
### Schema Discovery Modes
|
||||
|
||||
1. **Live Introspection** (default): Queries the live GraphQL endpoint when no `--schema-path` is provided
|
||||
2. **Local Schema Files**: Uses `.graphql` files from the specified directory when `--schema-path` is provided
|
||||
|
||||
**Note:** These modes are mutually exclusive with no fallback between them. If introspection fails, the command will fail with an error. If local schema files are invalid, the command will fail with an error.
|
||||
|
||||
### Schema File Structure
|
||||
|
||||
When using `--schema-path`, the directory should contain `.graphql` files with:
|
||||
|
||||
```graphql
|
||||
# queries.graphql
|
||||
extend type Query {
|
||||
me: AuthenticatedUser
|
||||
searchAcrossEntities(input: SearchInput!): SearchResults
|
||||
}
|
||||
|
||||
# mutations.graphql
|
||||
extend type Mutation {
|
||||
addTag(input: TagAssociationInput!): String
|
||||
deleteEntity(urn: String!): String
|
||||
}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The CLI provides clear error messages for common issues:
|
||||
|
||||
```shell
|
||||
# Operation not found
|
||||
datahub graphql --describe nonExistentOp
|
||||
# Error: 'nonExistentOp' not found as an operation or type. Use --list-operations to see available operations or try a specific type name.
|
||||
|
||||
# Missing required arguments
|
||||
datahub graphql --operation searchAcrossEntities
|
||||
# Error: Operation 'searchAcrossEntities' requires arguments: input. Provide them using --variables '{"input": "value", ...}'
|
||||
|
||||
# Invalid JSON variables
|
||||
datahub graphql --operation me --variables '{invalid json}'
|
||||
# Error: Invalid JSON in variables: Expecting property name enclosed in double quotes
|
||||
```
|
||||
|
||||
## Output Formats
|
||||
|
||||
### Pretty Printing (Default)
|
||||
|
||||
```json
|
||||
{
|
||||
"me": {
|
||||
"corpUser": {
|
||||
"urn": "urn:li:corpuser:datahub",
|
||||
"username": "datahub"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Compact Output
|
||||
|
||||
```shell
|
||||
datahub graphql --operation me --no-pretty
|
||||
{"me":{"corpUser":{"urn":"urn:li:corpuser:datahub","username":"datahub"}}}
|
||||
```
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### Shell Scripts
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Get all datasets for a platform
|
||||
PLATFORM="mysql"
|
||||
RESULTS=$(datahub graphql --operation searchAcrossEntities --variables "{
|
||||
\"input\": {
|
||||
\"query\": \"*\",
|
||||
\"types\": [\"DATASET\"],
|
||||
\"filters\": [{\"field\": \"platform\", \"values\": [\"$PLATFORM\"]}]
|
||||
}
|
||||
}" --no-pretty)
|
||||
|
||||
echo "Found $(echo "$RESULTS" | jq '.searchAcrossEntities.total') datasets"
|
||||
```
|
||||
|
||||
### CI/CD Pipelines
|
||||
|
||||
```yaml
|
||||
# GitHub Actions example
|
||||
- name: Tag Important Datasets
|
||||
run: |
|
||||
datahub graphql --operation addTag --variables '{
|
||||
"input": {
|
||||
"resourceUrn": "${{ env.DATASET_URN }}",
|
||||
"tagUrn": "urn:li:tag:Production"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
## LLM Integration
|
||||
|
||||
The `--format json` option makes the CLI perfect for LLM integration:
|
||||
|
||||
### Benefits for AI Assistants
|
||||
|
||||
1. **Schema Understanding**: LLMs can parse the complete GraphQL schema structure
|
||||
2. **Query Generation**: AI can generate accurate GraphQL queries based on available operations
|
||||
3. **Type Validation**: LLMs understand required vs optional arguments and their types
|
||||
4. **Documentation**: Rich descriptions and examples help AI provide better user assistance
|
||||
|
||||
### Use Cases
|
||||
|
||||
```shell
|
||||
# AI assistant gets complete schema knowledge
|
||||
datahub graphql --list-operations --format json | ai-assistant process-schema
|
||||
|
||||
# Generate queries for user requests
|
||||
datahub graphql --describe searchAcrossEntities --recurse --format json | ai-helper generate-query --user-intent "find mysql tables"
|
||||
|
||||
# Validate user input against schema
|
||||
datahub graphql --describe createGroup --format json | validate-user-input
|
||||
```
|
||||
|
||||
### JSON Schema Benefits
|
||||
|
||||
- **Structured data**: No parsing of human-readable text required
|
||||
- **Complete type information**: Includes GraphQL type wrappers (NON_NULL, LIST)
|
||||
- **Rich metadata**: Descriptions, deprecation info, argument requirements
|
||||
- **Consistent format**: Predictable structure across all operations and types
|
||||
- **Recursive exploration**: Complete dependency graphs for complex types
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
1. **Start with Discovery**: Use `--list-operations` and `--describe` to understand available operations
|
||||
2. **Use --recurse**: When learning about complex operations, `--describe --recurse` shows the complete type structure
|
||||
3. **LLM Integration**: Use `--format json` when building AI assistants or automation tools
|
||||
4. **File-based Variables**: For complex variables, use JSON files instead of inline JSON
|
||||
5. **Error Handling**: The CLI provides detailed error messages - read them carefully for debugging
|
||||
6. **Schema Evolution**: Operations and types can change between DataHub versions - use discovery commands to stay current
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**"Introspection not available"**: Use `--schema-path` to point to local GraphQL schema files
|
||||
|
||||
**"Operation not found"**: Check spelling and use `--list-operations` to see available operations
|
||||
|
||||
**"Type not found"**: Verify type name casing (GraphQL types are case-sensitive)
|
||||
|
||||
**Environment issues**: Ensure DataHub server is running and accessible at the configured endpoint
|
||||
39
docs/cli.md
39
docs/cli.md
@ -67,6 +67,7 @@ Commands:
|
||||
exists A group of commands to check existence of entities in DataHub.
|
||||
forms A group of commands to interact with forms in DataHub.
|
||||
get A group of commands to get metadata from DataHub.
|
||||
graphql Execute GraphQL queries and mutations against DataHub.
|
||||
group A group of commands to interact with the Group entity in DataHub.
|
||||
ingest Ingest metadata into DataHub.
|
||||
init Configure which datahub instance to connect to
|
||||
@ -495,6 +496,44 @@ $ datahub get --urn "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,
|
||||
}
|
||||
```
|
||||
|
||||
### graphql
|
||||
|
||||
The `graphql` command allows you to execute GraphQL queries and mutations against DataHub's GraphQL API. This provides full access to DataHub's metadata through its native GraphQL interface.
|
||||
|
||||
```shell
|
||||
# Execute a GraphQL query
|
||||
datahub graphql --query "query { me { username } }"
|
||||
|
||||
# Use named operations from DataHub's schema
|
||||
datahub graphql --operation searchAcrossEntities --variables '{"input": {"query": "users"}}'
|
||||
|
||||
# List available operations
|
||||
datahub graphql --list-operations
|
||||
|
||||
# Get help for a specific operation
|
||||
datahub graphql --describe searchAcrossEntities
|
||||
|
||||
# Explore types recursively
|
||||
datahub graphql --describe SearchInput --recurse
|
||||
|
||||
# Load queries and variables from files
|
||||
datahub graphql --query ./search-tags.graphql --variables ./search-params.json
|
||||
|
||||
# Get JSON output for LLM integration
|
||||
datahub graphql --list-operations --format json
|
||||
```
|
||||
|
||||
The GraphQL command supports both raw GraphQL queries/mutations and operation-based execution using DataHub's introspected schema. It automatically detects whether `--query` and `--variables` arguments are file paths or literal content, enabling seamless use of both inline GraphQL and file-based queries.
|
||||
|
||||
Key features:
|
||||
|
||||
- **Schema discovery**: List and describe all available operations and types
|
||||
- **File support**: Load queries and variables from `.graphql` and `.json` files
|
||||
- **LLM-friendly output**: JSON format with complete type information
|
||||
- **Recursive exploration**: Deep-dive into complex GraphQL types
|
||||
|
||||
➡️ [Learn more about the GraphQL command](./cli-commands/graphql.md)
|
||||
|
||||
### put
|
||||
|
||||
The `put` group of commands allows you to write metadata into DataHub. This is a flexible way for you to issue edits to metadata from the command line.
|
||||
|
||||
1422
metadata-ingestion/src/datahub/cli/graphql_cli.py
Normal file
1422
metadata-ingestion/src/datahub/cli/graphql_cli.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@ from datahub.cli.docker_cli import docker
|
||||
from datahub.cli.env_utils import get_boolean_env_variable
|
||||
from datahub.cli.exists_cli import exists
|
||||
from datahub.cli.get_cli import get
|
||||
from datahub.cli.graphql_cli import graphql
|
||||
from datahub.cli.ingest_cli import ingest
|
||||
from datahub.cli.migrate import migrate
|
||||
from datahub.cli.put_cli import put
|
||||
@ -169,6 +170,7 @@ datahub.add_command(ingest)
|
||||
datahub.add_command(delete)
|
||||
datahub.add_command(exists)
|
||||
datahub.add_command(get)
|
||||
datahub.add_command(graphql)
|
||||
datahub.add_command(put)
|
||||
datahub.add_command(state)
|
||||
datahub.add_command(telemetry_cli)
|
||||
|
||||
2451
metadata-ingestion/tests/unit/cli/test_graphql_cli.py
Normal file
2451
metadata-ingestion/tests/unit/cli/test_graphql_cli.py
Normal file
File diff suppressed because it is too large
Load Diff
0
smoke-test/tests/cli/graphql_cmd/__init__.py
Normal file
0
smoke-test/tests/cli/graphql_cmd/__init__.py
Normal file
39
smoke-test/tests/cli/graphql_cmd/sample_queries.graphql
Normal file
39
smoke-test/tests/cli/graphql_cmd/sample_queries.graphql
Normal file
@ -0,0 +1,39 @@
|
||||
# Sample GraphQL queries for smoke testing
|
||||
|
||||
# Simple introspection query
|
||||
query IntrospectionQuery {
|
||||
__schema {
|
||||
queryType {
|
||||
name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Basic entity search
|
||||
query SearchEntities {
|
||||
search(input: {
|
||||
type: DATASET
|
||||
query: "*"
|
||||
start: 0
|
||||
count: 5
|
||||
}) {
|
||||
start
|
||||
count
|
||||
total
|
||||
searchResults {
|
||||
entity {
|
||||
urn
|
||||
type
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# List available platforms
|
||||
query ListPlatforms {
|
||||
listPlatforms {
|
||||
name
|
||||
displayName
|
||||
type
|
||||
}
|
||||
}
|
||||
4
smoke-test/tests/cli/graphql_cmd/sample_queries.json
Normal file
4
smoke-test/tests/cli/graphql_cmd/sample_queries.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"query": "{ __schema { queryType { name } } }",
|
||||
"variables": {}
|
||||
}
|
||||
335
smoke-test/tests/cli/graphql_cmd/test_graphql_cli_smoke.py
Normal file
335
smoke-test/tests/cli/graphql_cmd/test_graphql_cli_smoke.py
Normal file
@ -0,0 +1,335 @@
|
||||
"""
|
||||
Smoke tests for DataHub GraphQL CLI functionality.
|
||||
|
||||
These tests validate the core GraphQL CLI features including:
|
||||
- Schema discovery and introspection
|
||||
- File-based query execution with relative paths
|
||||
- JSON output formatting
|
||||
- CLI integration with DataHub instances
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from tests.utils import run_datahub_cmd, wait_for_healthcheck_util
|
||||
|
||||
|
||||
class TestGraphQLCLIStandalone:
|
||||
"""Fast standalone tests that don't require full DataHub functionality."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test environment variables."""
|
||||
self.original_env = os.environ.copy()
|
||||
# Ensure we have the required DataHub connection info
|
||||
os.environ.setdefault("DATAHUB_GMS_HOST", "http://localhost:8080")
|
||||
os.environ.setdefault("DATAHUB_GMS_TOKEN", "")
|
||||
|
||||
def teardown_method(self):
|
||||
"""Restore original environment."""
|
||||
os.environ.clear()
|
||||
os.environ.update(self.original_env)
|
||||
|
||||
def _run_datahub_cli(
|
||||
self, args: list[str], input_data: Optional[str] = None
|
||||
) -> tuple[int, str, str]:
|
||||
"""
|
||||
Run datahub CLI command and return (exit_code, stdout, stderr).
|
||||
|
||||
Args:
|
||||
args: CLI arguments (e.g., ['graphql', '--schema'])
|
||||
input_data: Optional stdin input
|
||||
|
||||
Returns:
|
||||
Tuple of (exit_code, stdout, stderr)
|
||||
"""
|
||||
result = run_datahub_cmd(args, input=input_data)
|
||||
return result.exit_code, result.stdout, result.stderr
|
||||
|
||||
def test_graphql_help(self):
|
||||
"""Test that GraphQL CLI help is accessible."""
|
||||
exit_code, stdout, stderr = self._run_datahub_cli(["graphql", "--help"])
|
||||
|
||||
assert exit_code == 0, f"CLI help failed with stderr: {stderr}"
|
||||
assert "GraphQL" in stdout or "graphql" in stdout
|
||||
assert "--list-operations" in stdout or "--schema-path" in stdout
|
||||
assert "--query" in stdout
|
||||
|
||||
def test_graphql_schema_discovery(self):
|
||||
"""Test GraphQL schema discovery functionality."""
|
||||
# This should work even without authentication for schema discovery
|
||||
exit_code, stdout, stderr = self._run_datahub_cli(
|
||||
["graphql", "--list-operations"]
|
||||
)
|
||||
|
||||
# Command may exit with error (no DataHub connection) but should not crash
|
||||
assert exit_code in [0, 1], f"Unexpected exit code. stderr: {stderr}"
|
||||
assert "Traceback" not in stderr # No Python crashes
|
||||
assert (
|
||||
"graphql" not in stderr.lower() or "command not found" not in stderr.lower()
|
||||
)
|
||||
|
||||
def test_graphql_file_path_handling(self):
|
||||
"""Test that GraphQL CLI properly handles file path arguments."""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".graphql", delete=False
|
||||
) as f:
|
||||
f.write("{ __typename }")
|
||||
temp_path = f.name
|
||||
|
||||
try:
|
||||
# Test with absolute path
|
||||
exit_code, stdout, stderr = self._run_datahub_cli(
|
||||
["graphql", "--query", temp_path, "--format", "json"]
|
||||
)
|
||||
|
||||
# Should recognize as file path (may fail due to missing DataHub connection)
|
||||
assert exit_code in [0, 1], (
|
||||
f"Unexpected exit code with file path. stderr: {stderr}"
|
||||
)
|
||||
assert "No such file or directory" not in stderr
|
||||
assert "FileNotFoundError" not in stderr
|
||||
|
||||
finally:
|
||||
os.unlink(temp_path)
|
||||
|
||||
def test_graphql_relative_path_handling(self):
|
||||
"""Test that GraphQL CLI handles relative paths correctly."""
|
||||
# Create a temporary GraphQL file in a subdirectory
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
query_file = temp_path / "test.graphql"
|
||||
query_file.write_text("{ __typename }")
|
||||
|
||||
# Change to parent directory and use relative path
|
||||
original_cwd = os.getcwd()
|
||||
try:
|
||||
os.chdir(temp_path.parent)
|
||||
relative_path = os.path.relpath(str(query_file))
|
||||
|
||||
exit_code, stdout, stderr = self._run_datahub_cli(
|
||||
["graphql", "--query", relative_path, "--format", "json"]
|
||||
)
|
||||
|
||||
# Should recognize relative path (may fail due to missing DataHub connection)
|
||||
assert exit_code in [0, 1], (
|
||||
f"Relative path handling failed. stderr: {stderr}"
|
||||
)
|
||||
assert "No such file or directory" not in stderr
|
||||
assert "FileNotFoundError" not in stderr
|
||||
|
||||
finally:
|
||||
os.chdir(original_cwd)
|
||||
|
||||
|
||||
class TestGraphQLCLIIntegration:
|
||||
"""Integration tests requiring full DataHub functionality."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_datahub(self, auth_session):
|
||||
"""Ensure DataHub is running and accessible."""
|
||||
self.auth_session = auth_session
|
||||
wait_for_healthcheck_util(requests)
|
||||
|
||||
def _run_authenticated_graphql(self, args: list[str]) -> tuple[int, str, str]:
|
||||
"""Run GraphQL CLI with proper authentication."""
|
||||
result = run_datahub_cmd(
|
||||
args,
|
||||
env={
|
||||
"DATAHUB_GMS_URL": self.auth_session.gms_url(),
|
||||
"DATAHUB_GMS_TOKEN": self.auth_session.gms_token(),
|
||||
},
|
||||
)
|
||||
return result.exit_code, result.stdout, result.stderr
|
||||
|
||||
def test_graphql_schema_introspection(self):
|
||||
"""Test GraphQL schema introspection with authentication."""
|
||||
exit_code, stdout, stderr = self._run_authenticated_graphql(
|
||||
["graphql", "--list-operations", "--format", "json"]
|
||||
)
|
||||
|
||||
assert exit_code == 0, f"Schema introspection failed: {stderr}"
|
||||
|
||||
# Should produce some output showing operations
|
||||
if stdout.strip():
|
||||
# Either JSON format or human-readable format is acceptable
|
||||
if stdout.strip().startswith("{") or stdout.strip().startswith("["):
|
||||
try:
|
||||
schema_data = json.loads(stdout)
|
||||
assert isinstance(schema_data, (dict, list))
|
||||
except json.JSONDecodeError as e:
|
||||
pytest.fail(f"Schema output is not valid JSON: {e}")
|
||||
else:
|
||||
# Human readable format
|
||||
assert len(stdout.strip()) > 0
|
||||
|
||||
def test_graphql_simple_query_execution(self):
|
||||
"""Test execution of a simple GraphQL query."""
|
||||
simple_query = "{ __typename }"
|
||||
|
||||
exit_code, stdout, stderr = self._run_authenticated_graphql(
|
||||
["graphql", "--query", simple_query, "--format", "json"]
|
||||
)
|
||||
|
||||
assert exit_code == 0, f"Simple query execution failed: {stderr}"
|
||||
|
||||
# Parse JSON output
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
assert isinstance(result, dict)
|
||||
# Should contain query response - either data field or direct response
|
||||
assert "data" in result or "__typename" in result or len(result) > 0
|
||||
except json.JSONDecodeError as e:
|
||||
pytest.fail(f"Query result is not valid JSON: {e}")
|
||||
|
||||
def test_graphql_query_from_file(self):
|
||||
"""Test GraphQL query execution from file with relative path."""
|
||||
query_content = """{
|
||||
__typename
|
||||
}"""
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".graphql", delete=False
|
||||
) as f:
|
||||
f.write(query_content)
|
||||
temp_path = f.name
|
||||
|
||||
try:
|
||||
# Test with absolute path
|
||||
exit_code, stdout, stderr = self._run_authenticated_graphql(
|
||||
["graphql", "--query", temp_path, "--format", "json"]
|
||||
)
|
||||
|
||||
assert exit_code == 0, f"File query execution failed: {stderr}"
|
||||
|
||||
# Parse JSON output
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
assert isinstance(result, dict)
|
||||
assert "data" in result or "__typename" in result or len(result) > 0
|
||||
except json.JSONDecodeError as e:
|
||||
pytest.fail(f"File query result is not valid JSON: {e}")
|
||||
|
||||
finally:
|
||||
os.unlink(temp_path)
|
||||
|
||||
def test_graphql_list_operations(self):
|
||||
"""Test GraphQL CLI list operations functionality."""
|
||||
for operation_type in ["queries", "mutations"]:
|
||||
exit_code, stdout, stderr = self._run_authenticated_graphql(
|
||||
["graphql", f"--list-{operation_type}", "--format", "json"]
|
||||
)
|
||||
|
||||
assert exit_code == 0, f"List {operation_type} failed: {stderr}"
|
||||
|
||||
# Should produce some output (might be empty list)
|
||||
if stdout.strip():
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
assert isinstance(result, (list, dict))
|
||||
except json.JSONDecodeError:
|
||||
# Some operations might produce non-JSON output, which is also acceptable
|
||||
assert len(stdout.strip()) > 0
|
||||
|
||||
def test_graphql_json_output_format(self):
|
||||
"""Test that JSON output format is properly structured."""
|
||||
exit_code, stdout, stderr = self._run_authenticated_graphql(
|
||||
["graphql", "--query", "{ __typename }", "--format", "json"]
|
||||
)
|
||||
|
||||
assert exit_code == 0, f"JSON output test failed: {stderr}"
|
||||
|
||||
# Verify JSON structure
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
assert isinstance(result, dict)
|
||||
|
||||
# Should follow GraphQL response format or be direct data
|
||||
if "data" in result:
|
||||
assert isinstance(result["data"], (dict, type(None)))
|
||||
elif "__typename" in result:
|
||||
# Direct response format
|
||||
assert isinstance(result, dict)
|
||||
if "errors" in result:
|
||||
assert isinstance(result["errors"], list)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
pytest.fail(f"JSON output is malformed: {e}")
|
||||
|
||||
def test_graphql_error_handling(self):
|
||||
"""Test GraphQL CLI error handling with invalid queries."""
|
||||
invalid_query = "{ invalidField { doesNotExist } }"
|
||||
|
||||
exit_code, stdout, stderr = self._run_authenticated_graphql(
|
||||
["graphql", "--query", invalid_query, "--format", "json"]
|
||||
)
|
||||
|
||||
# Should handle errors gracefully
|
||||
assert exit_code in [0, 1], (
|
||||
f"Error handling failed with unexpected exit code. stderr: {stderr}"
|
||||
)
|
||||
|
||||
if exit_code == 0:
|
||||
# If successful, should contain error information in GraphQL response
|
||||
try:
|
||||
result = json.loads(stdout)
|
||||
# GraphQL errors should be in the response
|
||||
assert "errors" in result or "data" in result
|
||||
except json.JSONDecodeError:
|
||||
pytest.fail("Invalid query should produce structured error response")
|
||||
else:
|
||||
# If exit code 1, should have meaningful error message
|
||||
assert len(stderr.strip()) > 0, "Should provide error message on failure"
|
||||
|
||||
|
||||
class TestGraphQLCLIFileHandling:
|
||||
"""Specific tests for file handling improvements."""
|
||||
|
||||
def test_json_file_detection(self):
|
||||
"""Test that CLI properly detects and handles JSON files."""
|
||||
json_content = {"query": "{ __typename }"}
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||
json.dump(json_content, f)
|
||||
temp_path = f.name
|
||||
|
||||
try:
|
||||
# Should recognize .json extension
|
||||
result = run_datahub_cmd(["graphql", "--query", temp_path])
|
||||
stderr = result.stderr
|
||||
|
||||
# Should not fail due to file detection issues
|
||||
assert "No such file or directory" not in stderr
|
||||
assert "FileNotFoundError" not in stderr
|
||||
|
||||
finally:
|
||||
os.unlink(temp_path)
|
||||
|
||||
def test_relative_path_resolution(self):
|
||||
"""Test basic relative path resolution scenarios."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
# Test simple relative path scenario
|
||||
query_file = temp_path / "query.graphql"
|
||||
query_file.write_text("{ __typename }")
|
||||
|
||||
original_cwd = os.getcwd()
|
||||
try:
|
||||
os.chdir(temp_dir)
|
||||
test_path = "./query.graphql"
|
||||
|
||||
result = run_datahub_cmd(["graphql", "--query", test_path])
|
||||
stderr = result.stderr
|
||||
|
||||
# File should be found and recognized
|
||||
assert "No such file or directory" not in stderr
|
||||
assert "FileNotFoundError" not in stderr
|
||||
|
||||
finally:
|
||||
os.chdir(original_cwd)
|
||||
Loading…
x
Reference in New Issue
Block a user