mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-26 01:23:16 +00:00
1 line
48 KiB
JavaScript
1 line
48 KiB
JavaScript
"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[4858],{86342:(e,a,t)=>{t.r(a),t.d(a,{assets:()=>c,contentTitle:()=>d,default:()=>u,frontMatter:()=>p,metadata:()=>g,toc:()=>m});t(96540);var n=t(15680),l=t(53720),i=t(5400);function s(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function r(e,a){return a=null!=a?a:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):function(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);a&&(n=n.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,n)}return t}(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))})),e}function o(e,a){if(null==e)return{};var t,n,l=function(e,a){if(null==e)return{};var t,n,l={},i=Object.keys(e);for(n=0;n<i.length;n++)t=i[n],a.indexOf(t)>=0||(l[t]=e[t]);return l}(e,a);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n<i.length;n++)t=i[n],a.indexOf(t)>=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(l[t]=e[t])}return l}const p={sidebar_position:6,title:"Cassandra",slug:"/generated/ingestion/sources/cassandra",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/cassandra.md"},d="Cassandra",g={unversionedId:"docs/generated/ingestion/sources/cassandra",id:"docs/generated/ingestion/sources/cassandra",title:"Cassandra",description:"Incubating",source:"@site/genDocs/docs/generated/ingestion/sources/cassandra.md",sourceDirName:"docs/generated/ingestion/sources",slug:"/generated/ingestion/sources/cassandra",permalink:"/docs/generated/ingestion/sources/cassandra",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/cassandra.md",tags:[],version:"current",sidebarPosition:6,frontMatter:{sidebar_position:6,title:"Cassandra",slug:"/generated/ingestion/sources/cassandra",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/docs/generated/ingestion/sources/cassandra.md"},sidebar:"overviewSidebar",previous:{title:"Business Glossary",permalink:"/docs/generated/ingestion/sources/business-glossary"},next:{title:"ClickHouse",permalink:"/docs/generated/ingestion/sources/clickhouse"}},c={},m=[{value:"Important Capabilities",id:"important-capabilities",level:3},{value:"Setup",id:"setup",level:3},{value:"Steps to Get the Required Information",id:"steps-to-get-the-required-information",level:4},{value:"CLI based Ingestion",id:"cli-based-ingestion",level:3},{value:"Starter Recipe",id:"starter-recipe",level:3},{value:"Config Details",id:"config-details",level:3},{value:"Code Coordinates",id:"code-coordinates",level:3}],f={toc:m},y="wrapper";function u(e){var{components:a}=e,t=o(e,["components"]);return(0,n.yg)(y,r(function(e){for(var a=1;a<arguments.length;a++){var t=null!=arguments[a]?arguments[a]:{},n=Object.keys(t);"function"==typeof Object.getOwnPropertySymbols&&(n=n.concat(Object.getOwnPropertySymbols(t).filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable})))),n.forEach((function(a){s(e,a,t[a])}))}return e}({},f,t),{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h1",{id:"cassandra"},"Cassandra"),(0,n.yg)("p",null,(0,n.yg)("img",{parentName:"p",src:"https://img.shields.io/badge/support%20status-incubating-blue",alt:"Incubating"})),(0,n.yg)("h3",{id:"important-capabilities"},"Important Capabilities"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Capability"),(0,n.yg)("th",{parentName:"tr",align:null},"Status"),(0,n.yg)("th",{parentName:"tr",align:null},"Notes"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Asset Containers"),(0,n.yg)("td",{parentName:"tr",align:null},"\u2705"),(0,n.yg)("td",{parentName:"tr",align:null},"Enabled by default.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("a",{parentName:"td",href:"/docs/metadata-ingestion/docs/dev_guides/stateful#stale-entity-removal"},"Detect Deleted Entities")),(0,n.yg)("td",{parentName:"tr",align:null},"\u2705"),(0,n.yg)("td",{parentName:"tr",align:null},"Enabled by default via stateful ingestion.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},(0,n.yg)("a",{parentName:"td",href:"/docs/platform-instances"},"Platform Instance")),(0,n.yg)("td",{parentName:"tr",align:null},"\u2705"),(0,n.yg)("td",{parentName:"tr",align:null},"Enabled by default.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Schema Metadata"),(0,n.yg)("td",{parentName:"tr",align:null},"\u2705"),(0,n.yg)("td",{parentName:"tr",align:null},"Enabled by default.")))),(0,n.yg)("p",null,"This plugin extracts the following:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Metadata for tables"),(0,n.yg)("li",{parentName:"ul"},"Column types associated with each table column"),(0,n.yg)("li",{parentName:"ul"},"The keyspace each table belongs to")),(0,n.yg)("h3",{id:"setup"},"Setup"),(0,n.yg)("p",null,"This integration pulls metadata directly from Cassandra databases, including both ",(0,n.yg)("strong",{parentName:"p"},"DataStax Astra DB")," and ",(0,n.yg)("strong",{parentName:"p"},"Cassandra Enterprise Edition (EE)"),"."),(0,n.yg)("p",null,"You\u2019ll need to have a Cassandra instance or an Astra DB setup with appropriate access permissions."),(0,n.yg)("h4",{id:"steps-to-get-the-required-information"},"Steps to Get the Required Information"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},(0,n.yg)("strong",{parentName:"p"},"Set Up User Credentials"),":"),(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("strong",{parentName:"li"},"For Astra DB"),":",(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"Log in to your Astra DB Console."),(0,n.yg)("li",{parentName:"ul"},"Navigate to ",(0,n.yg)("strong",{parentName:"li"},"Organization Settings")," > ",(0,n.yg)("strong",{parentName:"li"},"Token Management"),"."),(0,n.yg)("li",{parentName:"ul"},"Generate an ",(0,n.yg)("strong",{parentName:"li"},"Application Token")," with the required permissions for read access."),(0,n.yg)("li",{parentName:"ul"},"Download the ",(0,n.yg)("strong",{parentName:"li"},"Secure Connect Bundle")," from the Astra DB Console."))),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("strong",{parentName:"li"},"For Cassandra EE"),":",(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"Ensure you have a ",(0,n.yg)("strong",{parentName:"li"},"username")," and ",(0,n.yg)("strong",{parentName:"li"},"password")," with read access to the necessary keyspaces."))))),(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},(0,n.yg)("strong",{parentName:"p"},"Permissions"),":"),(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"The user or token must have ",(0,n.yg)("inlineCode",{parentName:"li"},"SELECT")," permissions that allow it to:",(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"Access metadata in system keyspaces (e.g., ",(0,n.yg)("inlineCode",{parentName:"li"},"system_schema"),") to retrieve information about keyspaces, tables, columns, and views."),(0,n.yg)("li",{parentName:"ul"},"Perform ",(0,n.yg)("inlineCode",{parentName:"li"},"SELECT")," operations on the data tables if data profiling is enabled."))))),(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},(0,n.yg)("strong",{parentName:"p"},"Verify Database Access"),":"),(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"For Astra DB: Ensure the ",(0,n.yg)("strong",{parentName:"li"},"Secure Connect Bundle")," is used and configured correctly."),(0,n.yg)("li",{parentName:"ul"},"For Cassandra Opensource: Ensure the ",(0,n.yg)("strong",{parentName:"li"},"contact point")," and ",(0,n.yg)("strong",{parentName:"li"},"port")," are accessible.")))),(0,n.yg)("admonition",{type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"When enabling profiling, make sure to set a limit on the number of rows to sample. Profiling large tables without a limit may lead to excessive resource consumption and slow performance.")),(0,n.yg)("admonition",{type:"note"},(0,n.yg)("p",{parentName:"admonition"},"For cloud configuration with Astra DB, it is necessary to specify the Secure Connect Bundle path in the configuration. For that reason, use the CLI to ingest metadata into DataHub.")),(0,n.yg)("h3",{id:"cli-based-ingestion"},"CLI based Ingestion"),(0,n.yg)("h3",{id:"starter-recipe"},"Starter Recipe"),(0,n.yg)("p",null,"Check out the following recipe to get started with ingestion! See ",(0,n.yg)("a",{parentName:"p",href:"#config-details"},"below")," for full configuration options."),(0,n.yg)("p",null,"For general pointers on writing and running a recipe, see our ",(0,n.yg)("a",{parentName:"p",href:"/docs/metadata-ingestion#recipes"},"main recipe guide"),"."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-yaml"},'source:\n type: "cassandra"\n config:\n # Credentials for on prem cassandra\n contact_point: "localhost"\n port: 9042\n username: "admin"\n password: "password"\n\n # Or\n # Credentials Astra Cloud\n #cloud_config:\n # secure_connect_bundle: "Path to Secure Connect Bundle (.zip)"\n # token: "Application Token"\n\n # Optional Allow / Deny extraction of particular keyspaces.\n keyspace_pattern:\n allow: [".*"]\n\n # Optional Allow / Deny extraction of particular tables.\n table_pattern:\n allow: [".*"]\n\n # Optional \n profiling:\n enabled: true\n profile_table_level_only: true\n\nsink:\n # config sinks\n\n')),(0,n.yg)("h3",{id:"config-details"},"Config Details"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(i.A,{value:"options",label:"Options",default:!0,mdxType:"TabItem"},(0,n.yg)("p",null,"Note that a ",(0,n.yg)("inlineCode",{parentName:"p"},".")," is used to denote nested fields in the YAML recipe."),(0,n.yg)("div",{className:"config-table"},(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Field"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"contact_point"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Domain or IP address of the Cassandra instance (excluding port). ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"localhost")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"keyspace_pattern"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"AllowDenyPattern"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Regex patterns to filter keyspaces for ingestion. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"{","'","allow","'",": ","[","'",".","*","'","]",", ","'","deny","'",": ","[","]",", ","'","ignoreCase","'",": True","}")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"password"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Password credential associated with the specified username.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"platform_instance"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"The instance of the platform that all assets produced by this recipe belong to. This should be unique within the platform. See ",(0,n.yg)("a",{parentName:"td",href:"https://docs.datahub.com/docs/platform-instances/"},"https://docs.datahub.com/docs/platform-instances/")," for more details.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"port"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Port number to connect to the Cassandra instance. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"9042")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"profile_pattern"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"AllowDenyPattern"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Regex patterns for tables to profile ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"{","'","allow","'",": ","[","'",".","*","'","]",", ","'","deny","'",": ","[","]",", ","'","ignoreCase","'",": True","}")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"ssl_ca_certs"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Path to the CA certificate file for SSL connections.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"ssl_certfile"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Path to the SSL certificate file for SSL connections.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"ssl_keyfile"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Path to the SSL key file for SSL connections.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"table_pattern"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"AllowDenyPattern"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Regex patterns to filter keyspaces.tables for ingestion. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"{","'","allow","'",": ","[","'",".","*","'","]",", ","'","deny","'",": ","[","]",", ","'","ignoreCase","'",": True","}")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"username"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Username credential with read access to the system_schema keyspace.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"env"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"The environment that all assets produced by this connector belong to ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"PROD")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"cloud_config"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"CassandraCloudConfig"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Configuration for cloud-based Cassandra, such as DataStax Astra DB.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"cloud_config."),(0,n.yg)("span",{className:"path-main"},"secure_connect_bundle"),"\xa0",(0,n.yg)("abbr",{title:"Required if cloud_config is set"},"\u2753"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"File path to the Secure Connect Bundle (.zip) used for a secure connection to DataStax Astra DB.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"cloud_config."),(0,n.yg)("span",{className:"path-main"},"token"),"\xa0",(0,n.yg)("abbr",{title:"Required if cloud_config is set"},"\u2753"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"string"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"The Astra DB application token used for authentication.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"cloud_config."),(0,n.yg)("span",{className:"path-main"},"connect_timeout"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Timeout in seconds for establishing new connections to Cassandra. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"600")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"cloud_config."),(0,n.yg)("span",{className:"path-main"},"request_timeout"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Timeout in seconds for individual Cassandra requests. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"600")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"profiling"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"GEProfilingBaseConfig"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Configuration for profiling ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"{","'","enabled","'",": False, ","'","operation","_","config","'",": ","{","'","lower","_","fre...")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"enabled"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether profiling should be done. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_distinct_count"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the number of distinct values for each column. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_distinct_value_frequencies"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for distinct value frequencies. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_histogram"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the histogram for numeric fields. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_max_value"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the max value of numeric columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_mean_value"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the mean value of numeric columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_median_value"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the median value of numeric columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_min_value"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the min value of numeric columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_null_count"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the number of nulls for each column. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_quantiles"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the quantiles of numeric columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_sample_values"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the sample values for all columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"include_field_stddev_value"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to profile for the standard deviation of numeric columns. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"limit"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Max number of documents to profile. By default, profiles all documents.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"max_workers"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of worker threads to use for profiling. Set to 1 to disable. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"20")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"offset"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Offset in documents to profile. By default, uses no offset.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"profile_table_level_only"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to perform profiling at table-level only, or include column-level profiling as well. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling."),(0,n.yg)("span",{className:"path-main"},"operation_config"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"OperationConfig"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Experimental feature. To specify operation configs.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling.operation_config."),(0,n.yg)("span",{className:"path-main"},"lower_freq_profile_enabled"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether to do profiling at lower freq or not. This does not do any scheduling just adds additional checks to when not to run profiling. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling.operation_config."),(0,n.yg)("span",{className:"path-main"},"profile_date_of_month"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Number between 1 to 31 for date of month (both inclusive). If not specified, defaults to Nothing and this field does not take affect.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"profiling.operation_config."),(0,n.yg)("span",{className:"path-main"},"profile_day_of_week"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"integer"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Number between 0 to 6 for day of week (both inclusive). 0 is Monday and 6 is Sunday. If not specified, defaults to Nothing and this field does not take affect.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-main"},"stateful_ingestion"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"StatefulStaleMetadataRemovalConfig"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Configuration for stateful ingestion and stale metadata removal.")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"stateful_ingestion."),(0,n.yg)("span",{className:"path-main"},"enabled"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Whether or not to enable stateful ingest. Default: True if a pipeline_name is set and either a datahub-rest sink or ",(0,n.yg)("inlineCode",{parentName:"td"},"datahub_api")," is specified, otherwise False ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"False")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"stateful_ingestion."),(0,n.yg)("span",{className:"path-main"},"fail_safe_threshold"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"number"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"75.0")))),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},(0,n.yg)("div",{className:"path-line"},(0,n.yg)("span",{className:"path-prefix"},"stateful_ingestion."),(0,n.yg)("span",{className:"path-main"},"remove_stale_metadata"))," ",(0,n.yg)("div",{className:"type-name-line"},(0,n.yg)("span",{className:"type-name"},"boolean"))),(0,n.yg)("td",{parentName:"tr",align:"left"},"Soft-deletes the entities present in the last successful run but missing in the current run with stateful_ingestion enabled. ",(0,n.yg)("div",{className:"default-line default-line-with-docs"},"Default: ",(0,n.yg)("span",{className:"default-value"},"True")))))))),(0,n.yg)(i.A,{value:"schema",label:"Schema",mdxType:"TabItem"},(0,n.yg)("p",null,"The ",(0,n.yg)("a",{parentName:"p",href:"https://json-schema.org/"},"JSONSchema")," for this configuration is inlined below."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-javascript"},'{\n "title": "CassandraSourceConfig",\n "description": "Configuration for connecting to a Cassandra or DataStax Astra DB source.",\n "type": "object",\n "properties": {\n "env": {\n "title": "Env",\n "description": "The environment that all assets produced by this connector belong to",\n "default": "PROD",\n "type": "string"\n },\n "stateful_ingestion": {\n "title": "Stateful Ingestion",\n "description": "Configuration for stateful ingestion and stale metadata removal.",\n "allOf": [\n {\n "$ref": "#/definitions/StatefulStaleMetadataRemovalConfig"\n }\n ]\n },\n "platform_instance": {\n "title": "Platform Instance",\n "description": "The instance of the platform that all assets produced by this recipe belong to. This should be unique within the platform. See https://docs.datahub.com/docs/platform-instances/ for more details.",\n "type": "string"\n },\n "contact_point": {\n "title": "Contact Point",\n "description": "Domain or IP address of the Cassandra instance (excluding port).",\n "default": "localhost",\n "type": "string"\n },\n "port": {\n "title": "Port",\n "description": "Port number to connect to the Cassandra instance.",\n "default": 9042,\n "type": "integer"\n },\n "username": {\n "title": "Username",\n "description": "Username credential with read access to the system_schema keyspace.",\n "type": "string"\n },\n "password": {\n "title": "Password",\n "description": "Password credential associated with the specified username.",\n "type": "string"\n },\n "cloud_config": {\n "title": "Cloud Config",\n "description": "Configuration for cloud-based Cassandra, such as DataStax Astra DB.",\n "allOf": [\n {\n "$ref": "#/definitions/CassandraCloudConfig"\n }\n ]\n },\n "ssl_ca_certs": {\n "title": "Ssl Ca Certs",\n "description": "Path to the CA certificate file for SSL connections.",\n "type": "string"\n },\n "ssl_certfile": {\n "title": "Ssl Certfile",\n "description": "Path to the SSL certificate file for SSL connections.",\n "type": "string"\n },\n "ssl_keyfile": {\n "title": "Ssl Keyfile",\n "description": "Path to the SSL key file for SSL connections.",\n "type": "string"\n },\n "keyspace_pattern": {\n "title": "Keyspace Pattern",\n "description": "Regex patterns to filter keyspaces for ingestion.",\n "default": {\n "allow": [\n ".*"\n ],\n "deny": [],\n "ignoreCase": true\n },\n "allOf": [\n {\n "$ref": "#/definitions/AllowDenyPattern"\n }\n ]\n },\n "table_pattern": {\n "title": "Table Pattern",\n "description": "Regex patterns to filter keyspaces.tables for ingestion.",\n "default": {\n "allow": [\n ".*"\n ],\n "deny": [],\n "ignoreCase": true\n },\n "allOf": [\n {\n "$ref": "#/definitions/AllowDenyPattern"\n }\n ]\n },\n "profile_pattern": {\n "title": "Profile Pattern",\n "description": "Regex patterns for tables to profile",\n "default": {\n "allow": [\n ".*"\n ],\n "deny": [],\n "ignoreCase": true\n },\n "allOf": [\n {\n "$ref": "#/definitions/AllowDenyPattern"\n }\n ]\n },\n "profiling": {\n "title": "Profiling",\n "description": "Configuration for profiling",\n "default": {\n "enabled": false,\n "operation_config": {\n "lower_freq_profile_enabled": false,\n "profile_day_of_week": null,\n "profile_date_of_month": null\n },\n "limit": null,\n "offset": null,\n "profile_table_level_only": false,\n "include_field_null_count": true,\n "include_field_distinct_count": true,\n "include_field_min_value": true,\n "include_field_max_value": true,\n "include_field_mean_value": true,\n "include_field_median_value": true,\n "include_field_stddev_value": true,\n "include_field_quantiles": false,\n "include_field_distinct_value_frequencies": false,\n "include_field_histogram": false,\n "include_field_sample_values": true,\n "max_workers": 20\n },\n "allOf": [\n {\n "$ref": "#/definitions/GEProfilingBaseConfig"\n }\n ]\n }\n },\n "additionalProperties": false,\n "definitions": {\n "DynamicTypedStateProviderConfig": {\n "title": "DynamicTypedStateProviderConfig",\n "type": "object",\n "properties": {\n "type": {\n "title": "Type",\n "description": "The type of the state provider to use. For DataHub use `datahub`",\n "type": "string"\n },\n "config": {\n "title": "Config",\n "description": "The configuration required for initializing the state provider. Default: The datahub_api config if set at pipeline level. Otherwise, the default DatahubClientConfig. See the defaults (https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/graph/client.py#L19).",\n "default": {},\n "type": "object"\n }\n },\n "required": [\n "type"\n ],\n "additionalProperties": false\n },\n "StatefulStaleMetadataRemovalConfig": {\n "title": "StatefulStaleMetadataRemovalConfig",\n "description": "Base specialized config for Stateful Ingestion with stale metadata removal capability.",\n "type": "object",\n "properties": {\n "enabled": {\n "title": "Enabled",\n "description": "Whether or not to enable stateful ingest. Default: True if a pipeline_name is set and either a datahub-rest sink or `datahub_api` is specified, otherwise False",\n "default": false,\n "type": "boolean"\n },\n "remove_stale_metadata": {\n "title": "Remove Stale Metadata",\n "description": "Soft-deletes the entities present in the last successful run but missing in the current run with stateful_ingestion enabled.",\n "default": true,\n "type": "boolean"\n },\n "fail_safe_threshold": {\n "title": "Fail Safe Threshold",\n "description": "Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the \'fail_safe_threshold\'.",\n "default": 75.0,\n "minimum": 0.0,\n "maximum": 100.0,\n "type": "number"\n }\n },\n "additionalProperties": false\n },\n "CassandraCloudConfig": {\n "title": "CassandraCloudConfig",\n "description": "Configuration for connecting to DataStax Astra DB in the cloud.",\n "type": "object",\n "properties": {\n "token": {\n "title": "Token",\n "description": "The Astra DB application token used for authentication.",\n "type": "string"\n },\n "secure_connect_bundle": {\n "title": "Secure Connect Bundle",\n "description": "File path to the Secure Connect Bundle (.zip) used for a secure connection to DataStax Astra DB.",\n "type": "string"\n },\n "connect_timeout": {\n "title": "Connect Timeout",\n "description": "Timeout in seconds for establishing new connections to Cassandra.",\n "default": 600,\n "type": "integer"\n },\n "request_timeout": {\n "title": "Request Timeout",\n "description": "Timeout in seconds for individual Cassandra requests.",\n "default": 600,\n "type": "integer"\n }\n },\n "required": [\n "token",\n "secure_connect_bundle"\n ],\n "additionalProperties": false\n },\n "AllowDenyPattern": {\n "title": "AllowDenyPattern",\n "description": "A class to store allow deny regexes",\n "type": "object",\n "properties": {\n "allow": {\n "title": "Allow",\n "description": "List of regex patterns to include in ingestion",\n "default": [\n ".*"\n ],\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n "deny": {\n "title": "Deny",\n "description": "List of regex patterns to exclude from ingestion.",\n "default": [],\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n "ignoreCase": {\n "title": "Ignorecase",\n "description": "Whether to ignore case sensitivity during pattern matching.",\n "default": true,\n "type": "boolean"\n }\n },\n "additionalProperties": false\n },\n "OperationConfig": {\n "title": "OperationConfig",\n "type": "object",\n "properties": {\n "lower_freq_profile_enabled": {\n "title": "Lower Freq Profile Enabled",\n "description": "Whether to do profiling at lower freq or not. This does not do any scheduling just adds additional checks to when not to run profiling.",\n "default": false,\n "type": "boolean"\n },\n "profile_day_of_week": {\n "title": "Profile Day Of Week",\n "description": "Number between 0 to 6 for day of week (both inclusive). 0 is Monday and 6 is Sunday. If not specified, defaults to Nothing and this field does not take affect.",\n "type": "integer"\n },\n "profile_date_of_month": {\n "title": "Profile Date Of Month",\n "description": "Number between 1 to 31 for date of month (both inclusive). If not specified, defaults to Nothing and this field does not take affect.",\n "type": "integer"\n }\n },\n "additionalProperties": false\n },\n "GEProfilingBaseConfig": {\n "title": "GEProfilingBaseConfig",\n "type": "object",\n "properties": {\n "enabled": {\n "title": "Enabled",\n "description": "Whether profiling should be done.",\n "default": false,\n "type": "boolean"\n },\n "operation_config": {\n "title": "Operation Config",\n "description": "Experimental feature. To specify operation configs.",\n "allOf": [\n {\n "$ref": "#/definitions/OperationConfig"\n }\n ]\n },\n "limit": {\n "title": "Limit",\n "description": "Max number of documents to profile. By default, profiles all documents.",\n "type": "integer"\n },\n "offset": {\n "title": "Offset",\n "description": "Offset in documents to profile. By default, uses no offset.",\n "type": "integer"\n },\n "profile_table_level_only": {\n "title": "Profile Table Level Only",\n "description": "Whether to perform profiling at table-level only, or include column-level profiling as well.",\n "default": false,\n "type": "boolean"\n },\n "include_field_null_count": {\n "title": "Include Field Null Count",\n "description": "Whether to profile for the number of nulls for each column.",\n "default": true,\n "type": "boolean"\n },\n "include_field_distinct_count": {\n "title": "Include Field Distinct Count",\n "description": "Whether to profile for the number of distinct values for each column.",\n "default": true,\n "type": "boolean"\n },\n "include_field_min_value": {\n "title": "Include Field Min Value",\n "description": "Whether to profile for the min value of numeric columns.",\n "default": true,\n "type": "boolean"\n },\n "include_field_max_value": {\n "title": "Include Field Max Value",\n "description": "Whether to profile for the max value of numeric columns.",\n "default": true,\n "type": "boolean"\n },\n "include_field_mean_value": {\n "title": "Include Field Mean Value",\n "description": "Whether to profile for the mean value of numeric columns.",\n "default": true,\n "type": "boolean"\n },\n "include_field_median_value": {\n "title": "Include Field Median Value",\n "description": "Whether to profile for the median value of numeric columns.",\n "default": true,\n "type": "boolean"\n },\n "include_field_stddev_value": {\n "title": "Include Field Stddev Value",\n "description": "Whether to profile for the standard deviation of numeric columns.",\n "default": true,\n "type": "boolean"\n },\n "include_field_quantiles": {\n "title": "Include Field Quantiles",\n "description": "Whether to profile for the quantiles of numeric columns.",\n "default": false,\n "type": "boolean"\n },\n "include_field_distinct_value_frequencies": {\n "title": "Include Field Distinct Value Frequencies",\n "description": "Whether to profile for distinct value frequencies.",\n "default": false,\n "type": "boolean"\n },\n "include_field_histogram": {\n "title": "Include Field Histogram",\n "description": "Whether to profile for the histogram for numeric fields.",\n "default": false,\n "type": "boolean"\n },\n "include_field_sample_values": {\n "title": "Include Field Sample Values",\n "description": "Whether to profile for the sample values for all columns.",\n "default": true,\n "type": "boolean"\n },\n "max_workers": {\n "title": "Max Workers",\n "description": "Number of worker threads to use for profiling. Set to 1 to disable.",\n "default": 20,\n "type": "integer"\n }\n },\n "additionalProperties": false\n }\n }\n}\n')))),(0,n.yg)("h3",{id:"code-coordinates"},"Code Coordinates"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Class Name: ",(0,n.yg)("inlineCode",{parentName:"li"},"datahub.ingestion.source.cassandra.cassandra.CassandraSource")),(0,n.yg)("li",{parentName:"ul"},"Browse on ",(0,n.yg)("a",{parentName:"li",href:"https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra.py"},"GitHub"))),(0,n.yg)("h2",null,"Questions"),(0,n.yg)("p",null,"If you've got any questions on configuring ingestion for Cassandra, feel free to ping us on ",(0,n.yg)("a",{parentName:"p",href:"https://datahub.com/slack"},"our Slack"),"."))}u.isMDXComponent=!0}}]); |