mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-26 01:56:06 +00:00
1 line
9.8 KiB
JavaScript
1 line
9.8 KiB
JavaScript
"use strict";(self.webpackChunkdocs_website=self.webpackChunkdocs_website||[]).push([[67121],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?i(Object(a),!0).forEach((function(t){r(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):i(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function l(e,t){if(null==e)return{};var a,n,r=function(e,t){if(null==e)return{};var a,n,r={},i=Object.keys(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var u=n.createContext({}),s=function(e){var t=n.useContext(u),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},c=function(e){var t=s(e.components);return n.createElement(u.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,u=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=s(a),d=r,g=p["".concat(u,".").concat(d)]||p[d]||m[d]||i;return a?n.createElement(g,o(o({ref:t},c),{},{components:a})):n.createElement(g,o({ref:t},c))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,o=new Array(i);o[0]=d;var l={};for(var u in t)hasOwnProperty.call(t,u)&&(l[u]=t[u]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var s=2;s<i;s++)o[s]=a[s];return n.createElement.apply(null,o)}return n.createElement.apply(null,a)}d.displayName="MDXCreateElement"},72597:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>u,default:()=>g,frontMatter:()=>l,metadata:()=>s,toc:()=>p});a(96540);var n=a(15680);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){return t=null!=t?t:{},Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):function(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}(Object(t)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(t,a))})),e}function o(e,t){if(null==e)return{};var a,n,r=function(e,t){if(null==e)return{};var a,n,r={},i=Object.keys(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}const l={title:"SchemaTron (Incubating)",slug:"/metadata-integration/java/datahub-schematron",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/metadata-integration/java/datahub-schematron/README.md"},u="SchemaTron (Incubating)",s={unversionedId:"metadata-integration/java/datahub-schematron/README",id:"version-1.1.0/metadata-integration/java/datahub-schematron/README",title:"SchemaTron (Incubating)",description:"\u26a0\ufe0f This is an incubating project in draft status. APIs and functionality may change significantly between releases.",source:"@site/versioned_docs/version-1.1.0/metadata-integration/java/datahub-schematron/README.md",sourceDirName:"metadata-integration/java/datahub-schematron",slug:"/metadata-integration/java/datahub-schematron",permalink:"/docs/1.1.0/metadata-integration/java/datahub-schematron",draft:!1,editUrl:"https://github.com/datahub-project/datahub/blob/master/metadata-integration/java/datahub-schematron/README.md",tags:[],version:"1.1.0",frontMatter:{title:"SchemaTron (Incubating)",slug:"/metadata-integration/java/datahub-schematron",custom_edit_url:"https://github.com/datahub-project/datahub/blob/master/metadata-integration/java/datahub-schematron/README.md"}},c={},p=[{value:"Modules",id:"modules",level:2},{value:"CLI Module",id:"cli-module",level:3},{value:"CLI Options",id:"cli-options",level:4},{value:"Library Module",id:"library-module",level:3},{value:"Example Schema Support",id:"example-schema-support",level:2},{value:"Development",id:"development",level:2},{value:"Contributing",id:"contributing",level:2}],m={toc:p},d="wrapper";function g(e){var{components:t}=e,a=o(e,["components"]);return(0,n.yg)(d,i(function(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{},n=Object.keys(a);"function"==typeof Object.getOwnPropertySymbols&&(n=n.concat(Object.getOwnPropertySymbols(a).filter((function(e){return Object.getOwnPropertyDescriptor(a,e).enumerable})))),n.forEach((function(t){r(e,t,a[t])}))}return e}({},m,a),{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h1",{id:"schematron-incubating"},"SchemaTron (Incubating)"),(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"\u26a0\ufe0f This is an incubating project in draft status. APIs and functionality may change significantly between releases.")),(0,n.yg)("p",null,"SchemaTron is a schema translation toolkit that converts between various schema formats and DataHub's native schema representation. It currently provides robust support for Apache Avro schema translation with a focus on complex schema structures including unions, arrays, maps, and nested records."),(0,n.yg)("h2",{id:"modules"},"Modules"),(0,n.yg)("h3",{id:"cli-module"},"CLI Module"),(0,n.yg)("p",null,"Command-line interface for converting schemas and emitting them to DataHub."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-bash"},'# Execute from this directory\n../../../gradlew :metadata-integration:java:datahub-schematron:cli:run --args="-i cli/src/test/resources/FlatUser.avsc"\n')),(0,n.yg)("h4",{id:"cli-options"},"CLI Options"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("inlineCode",{parentName:"li"},"-i, --input"),": Input schema file or directory path"),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("inlineCode",{parentName:"li"},"-p, --platform"),': Data platform name (default: "avro")'),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("inlineCode",{parentName:"li"},"-s, --server"),': DataHub server URL (default: "http://localhost:8080")'),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("inlineCode",{parentName:"li"},"-t, --token"),": DataHub access token"),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("inlineCode",{parentName:"li"},"--sink"),': Output sink - "rest" or "file" (default: "rest")'),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("inlineCode",{parentName:"li"},"--output-file"),': Output file path when using file sink (default: "metadata.json")')),(0,n.yg)("h3",{id:"library-module"},"Library Module"),(0,n.yg)("p",null,"Core translation logic and models for schema conversion. Features include:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("p",{parentName:"li"},"Support for complex Avro schema structures:"),(0,n.yg)("ul",{parentName:"li"},(0,n.yg)("li",{parentName:"ul"},"Union types with multiple record options"),(0,n.yg)("li",{parentName:"ul"},"Nested records and arrays"),(0,n.yg)("li",{parentName:"ul"},"Optional fields with defaults"),(0,n.yg)("li",{parentName:"ul"},"Logical types (date, timestamp, etc.)"),(0,n.yg)("li",{parentName:"ul"},"Maps with various value types"),(0,n.yg)("li",{parentName:"ul"},"Enum types"),(0,n.yg)("li",{parentName:"ul"},"Custom metadata and documentation"))),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("p",{parentName:"li"},"Comprehensive path handling for schema fields")),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("p",{parentName:"li"},"DataHub-compatible metadata generation")),(0,n.yg)("li",{parentName:"ul"},(0,n.yg)("p",{parentName:"li"},"Schema fingerprinting and versioning"))),(0,n.yg)("h2",{id:"example-schema-support"},"Example Schema Support"),(0,n.yg)("p",null,"The library can handle sophisticated schema structures including:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Customer profiles with multiple identification types (passport, driver's license, national ID)"),(0,n.yg)("li",{parentName:"ul"},"Contact information with primary and alternative contact methods"),(0,n.yg)("li",{parentName:"ul"},"Address validation with verification metadata"),(0,n.yg)("li",{parentName:"ul"},"Subscription history tracking"),(0,n.yg)("li",{parentName:"ul"},"Flexible preference and metadata storage"),(0,n.yg)("li",{parentName:"ul"},"Tagged customer attributes")),(0,n.yg)("h2",{id:"development"},"Development"),(0,n.yg)("p",null,"The project includes extensive test coverage through:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Unit tests for field path handling"),(0,n.yg)("li",{parentName:"ul"},"Schema translation comparison tests"),(0,n.yg)("li",{parentName:"ul"},"Integration tests with Python reference implementation")),(0,n.yg)("p",null,"Test resources include example schemas demonstrating various Avro schema features and edge cases."),(0,n.yg)("h2",{id:"contributing"},"Contributing"),(0,n.yg)("p",null,"As this is an incubating project, we welcome contributions and feedback on:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Additional schema format support"),(0,n.yg)("li",{parentName:"ul"},"Improved handling of complex schema patterns"),(0,n.yg)("li",{parentName:"ul"},"Enhanced metadata translation"),(0,n.yg)("li",{parentName:"ul"},"Documentation and examples"),(0,n.yg)("li",{parentName:"ul"},"Test coverage")))}g.isMDXComponent=!0}}]); |