mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-24 16:38:19 +00:00
ci: add markdown-link-check (#8771)
This commit is contained in:
parent
493d31531a
commit
31abf383d1
@ -6,7 +6,7 @@ export const Logo = (props) => {
|
||||
<div style={{ display: "flex", justifyContent: "center", padding: "20px", height: "190px" }}>
|
||||
<img
|
||||
alt="DataHub Logo"
|
||||
src={useBaseUrl("/img/datahub-logo-color-mark.svg")}
|
||||
src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-logo-color-mark.svg"
|
||||
{...props}
|
||||
/>
|
||||
</div>
|
||||
@ -18,7 +18,7 @@ export const Logo = (props) => {
|
||||
<!--
|
||||
HOSTED_DOCS_ONLY-->
|
||||
<p align="center">
|
||||
<img alt="DataHub" src="docs/imgs/datahub-logo-color-mark.svg" height="150" />
|
||||
<img alt="DataHub" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-logo-color-mark.svg" height="150" />
|
||||
</p>
|
||||
<!-- -->
|
||||
|
||||
@ -156,7 +156,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
|
||||
|
||||
- [DataHub Blog](https://blog.datahubproject.io/)
|
||||
- [DataHub YouTube Channel](https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w)
|
||||
- [Optum: Data Mesh via DataHub](https://optum.github.io/blog/2022/03/23/data-mesh-via-datahub/)
|
||||
- [Optum: Data Mesh via DataHub](https://opensource.optum.com/blog/2022/03/23/data-mesh-via-datahub)
|
||||
- [Saxo Bank: Enabling Data Discovery in Data Mesh](https://medium.com/datahub-project/enabling-data-discovery-in-a-data-mesh-the-saxo-journey-451b06969c8f)
|
||||
- [Bringing The Power Of The DataHub Real-Time Metadata Graph To Everyone At Acryl Data](https://www.dataengineeringpodcast.com/acryl-data-datahub-metadata-graph-episode-230/)
|
||||
- [DataHub: Popular Metadata Architectures Explained](https://engineering.linkedin.com/blog/2020/datahub-popular-metadata-architectures-explained)
|
||||
|
||||
@ -89,7 +89,7 @@ task fastReload(type: YarnTask) {
|
||||
args = ['run', 'generate-rsync']
|
||||
}
|
||||
|
||||
task yarnLint(type: YarnTask, dependsOn: [yarnInstall]) {
|
||||
task yarnLint(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) {
|
||||
inputs.files(projectMdFiles)
|
||||
args = ['run', 'lint-check']
|
||||
outputs.dir("dist")
|
||||
|
||||
50
docs-website/markdown-link-check-config.json
Normal file
50
docs-website/markdown-link-check-config.json
Normal file
@ -0,0 +1,50 @@
|
||||
{
|
||||
"ignorePatterns": [
|
||||
{
|
||||
"pattern": "^http://demo\\.datahubproject\\.io"
|
||||
},
|
||||
{
|
||||
"pattern": "^http://localhost"
|
||||
},
|
||||
{
|
||||
"pattern": "^http://www.famfamfam.com"
|
||||
},
|
||||
{
|
||||
"pattern": "^http://www.linkedin.com"
|
||||
},
|
||||
{
|
||||
"pattern": "\\.md$"
|
||||
},
|
||||
{
|
||||
"pattern":"\\.json$"
|
||||
},
|
||||
{
|
||||
"pattern":"\\.txt$"
|
||||
},
|
||||
{
|
||||
"pattern": "\\.java$"
|
||||
},
|
||||
{
|
||||
"pattern": "\\.md#.*$"
|
||||
},
|
||||
{
|
||||
"pattern": "^https://oauth2.googleapis.com/token"
|
||||
},
|
||||
{
|
||||
"pattern": "^https://login.microsoftonline.com/common/oauth2/na$"
|
||||
},
|
||||
{
|
||||
"pattern": "#v(\\d+)-(\\d+)-(\\d+)"
|
||||
},
|
||||
{
|
||||
"pattern": "^https://github.com/mohdsiddique$"
|
||||
},
|
||||
{
|
||||
"pattern": "^https://github.com/2x$"
|
||||
},
|
||||
{
|
||||
"pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$"
|
||||
}
|
||||
],
|
||||
"aliveStatusCodes": [200, 206, 0, 999, 400, 401, 403]
|
||||
}
|
||||
@ -17,7 +17,7 @@
|
||||
"generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs",
|
||||
"generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs",
|
||||
"lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js",
|
||||
"lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js",
|
||||
"lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js && find ./genDocs -name \\*.md -not -path \"./genDocs/python-sdk/models.md\" -print0 | xargs -0 -n1 markdown-link-check -p -q -c markdown-link-check-config.json",
|
||||
"lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
@ -37,6 +37,7 @@
|
||||
"docusaurus-graphql-plugin": "0.5.0",
|
||||
"docusaurus-plugin-sass": "^0.2.1",
|
||||
"dotenv": "^16.0.1",
|
||||
"markdown-link-check": "^3.11.2",
|
||||
"markprompt": "^0.1.7",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
|
||||
@ -3414,6 +3414,11 @@ async-validator@^4.1.0:
|
||||
resolved "https://registry.yarnpkg.com/async-validator/-/async-validator-4.2.5.tgz#c96ea3332a521699d0afaaceed510a54656c6339"
|
||||
integrity sha512-7HhHjtERjqlNbZtqNqy2rckN/SpOOlmDliet+lP7k+eKZEjPk3DgyeU9lIXLdeLz0uBbbVp+9Qdow9wJWgwwfg==
|
||||
|
||||
async@^3.2.4:
|
||||
version "3.2.4"
|
||||
resolved "https://registry.yarnpkg.com/async/-/async-3.2.4.tgz#2d22e00f8cddeb5fde5dd33522b56d1cf569a81c"
|
||||
integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==
|
||||
|
||||
asynckit@^0.4.0:
|
||||
version "0.4.0"
|
||||
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
|
||||
@ -3765,6 +3770,11 @@ chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2:
|
||||
ansi-styles "^4.1.0"
|
||||
supports-color "^7.1.0"
|
||||
|
||||
chalk@^5.2.0:
|
||||
version "5.3.0"
|
||||
resolved "https://registry.yarnpkg.com/chalk/-/chalk-5.3.0.tgz#67c20a7ebef70e7f3970a01f90fa210cb6860385"
|
||||
integrity sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==
|
||||
|
||||
character-entities-legacy@^1.0.0:
|
||||
version "1.1.4"
|
||||
resolved "https://registry.yarnpkg.com/character-entities-legacy/-/character-entities-legacy-1.1.4.tgz#94bc1845dce70a5bb9d2ecc748725661293d8fc1"
|
||||
@ -3797,7 +3807,7 @@ cheerio-select@^2.1.0:
|
||||
domhandler "^5.0.3"
|
||||
domutils "^3.0.1"
|
||||
|
||||
cheerio@^1.0.0-rc.12:
|
||||
cheerio@^1.0.0-rc.10, cheerio@^1.0.0-rc.12:
|
||||
version "1.0.0-rc.12"
|
||||
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
|
||||
integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
|
||||
@ -3984,6 +3994,11 @@ comma-separated-tokens@^2.0.0:
|
||||
resolved "https://registry.yarnpkg.com/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz#4e89c9458acb61bc8fef19f4529973b2392839ee"
|
||||
integrity sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==
|
||||
|
||||
commander@^10.0.1:
|
||||
version "10.0.1"
|
||||
resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06"
|
||||
integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==
|
||||
|
||||
commander@^2.20.0:
|
||||
version "2.20.3"
|
||||
resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
|
||||
@ -4385,6 +4400,13 @@ debug@4, debug@^4.0.0, debug@^4.1.0, debug@^4.1.1:
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^3.2.6:
|
||||
version "3.2.7"
|
||||
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
|
||||
integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
|
||||
dependencies:
|
||||
ms "^2.1.1"
|
||||
|
||||
decode-named-character-reference@^1.0.0:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz#daabac9690874c394c81e4162a0304b35d824f0e"
|
||||
@ -5551,6 +5573,13 @@ html-entities@^2.3.2:
|
||||
resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061"
|
||||
integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ==
|
||||
|
||||
html-link-extractor@^1.0.5:
|
||||
version "1.0.5"
|
||||
resolved "https://registry.yarnpkg.com/html-link-extractor/-/html-link-extractor-1.0.5.tgz#a4be345cb13b8c3352d82b28c8b124bb7bf5dd6f"
|
||||
integrity sha512-ADd49pudM157uWHwHQPUSX4ssMsvR/yHIswOR5CUfBdK9g9ZYGMhVSE6KZVHJ6kCkR0gH4htsfzU6zECDNVwyw==
|
||||
dependencies:
|
||||
cheerio "^1.0.0-rc.10"
|
||||
|
||||
html-minifier-terser@^6.0.2, html-minifier-terser@^6.1.0:
|
||||
version "6.1.0"
|
||||
resolved "https://registry.yarnpkg.com/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz#bfc818934cc07918f6b3669f5774ecdfd48f32ab"
|
||||
@ -5673,6 +5702,13 @@ iconv-lite@0.4.24:
|
||||
dependencies:
|
||||
safer-buffer ">= 2.1.2 < 3"
|
||||
|
||||
iconv-lite@^0.6.3:
|
||||
version "0.6.3"
|
||||
resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501"
|
||||
integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==
|
||||
dependencies:
|
||||
safer-buffer ">= 2.1.2 < 3.0.0"
|
||||
|
||||
icss-utils@^5.0.0, icss-utils@^5.1.0:
|
||||
version "5.1.0"
|
||||
resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-5.1.0.tgz#c6be6858abd013d768e98366ae47e25d5887b1ae"
|
||||
@ -5795,6 +5831,11 @@ ipaddr.js@^2.0.1:
|
||||
resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-2.1.0.tgz#2119bc447ff8c257753b196fc5f1ce08a4cdf39f"
|
||||
integrity sha512-LlbxQ7xKzfBusov6UMi4MFpEg0m+mAm9xyNGEduwXMEDuf4WfzB/RZwMVYEd7IKGvh4IUkEXYxtAVu9T3OelJQ==
|
||||
|
||||
is-absolute-url@^4.0.1:
|
||||
version "4.0.1"
|
||||
resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-4.0.1.tgz#16e4d487d4fded05cfe0685e53ec86804a5e94dc"
|
||||
integrity sha512-/51/TKE88Lmm7Gc4/8btclNXWS+g50wXhYJq8HWIBAGUBnoAdRu1aXeh364t/O7wXDAcTJDP8PNuNKWUDWie+A==
|
||||
|
||||
is-alphabetical@1.0.4, is-alphabetical@^1.0.0:
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/is-alphabetical/-/is-alphabetical-1.0.4.tgz#9e7d6b94916be22153745d184c298cbf986a686d"
|
||||
@ -5963,6 +6004,13 @@ is-regexp@^1.0.0:
|
||||
resolved "https://registry.yarnpkg.com/is-regexp/-/is-regexp-1.0.0.tgz#fd2d883545c46bac5a633e7b9a09e87fa2cb5069"
|
||||
integrity sha512-7zjFAPO4/gwyQAAgRRmqeEeyIICSdmCqa3tsVHMdBzaXXRiqopZL4Cyghg/XulGWrtABTpbnYYzzIRffLkP4oA==
|
||||
|
||||
is-relative-url@^4.0.0:
|
||||
version "4.0.0"
|
||||
resolved "https://registry.yarnpkg.com/is-relative-url/-/is-relative-url-4.0.0.tgz#4d8371999ff6033b76e4d9972fb5bf496fddfa97"
|
||||
integrity sha512-PkzoL1qKAYXNFct5IKdKRH/iBQou/oCC85QhXj6WKtUQBliZ4Yfd3Zk27RHu9KQG8r6zgvAA2AQKC9p+rqTszg==
|
||||
dependencies:
|
||||
is-absolute-url "^4.0.1"
|
||||
|
||||
is-root@^2.1.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/is-root/-/is-root-2.1.0.tgz#809e18129cf1129644302a4f8544035d51984a9c"
|
||||
@ -6010,6 +6058,13 @@ isarray@~1.0.0:
|
||||
resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
|
||||
integrity sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==
|
||||
|
||||
isemail@^3.2.0:
|
||||
version "3.2.0"
|
||||
resolved "https://registry.yarnpkg.com/isemail/-/isemail-3.2.0.tgz#59310a021931a9fb06bbb51e155ce0b3f236832c"
|
||||
integrity sha512-zKqkK+O+dGqevc93KNsbZ/TqTUFd46MwWjYOoMrjIMZ51eU7DtQG3Wmd9SQQT7i7RVnuTPEiYEWHU3MSbxC1Tg==
|
||||
dependencies:
|
||||
punycode "2.x.x"
|
||||
|
||||
isexe@^2.0.0:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
|
||||
@ -6205,6 +6260,16 @@ lines-and-columns@^1.1.6:
|
||||
resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632"
|
||||
integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==
|
||||
|
||||
link-check@^5.2.0:
|
||||
version "5.2.0"
|
||||
resolved "https://registry.yarnpkg.com/link-check/-/link-check-5.2.0.tgz#595a339d305900bed8c1302f4342a29c366bf478"
|
||||
integrity sha512-xRbhYLaGDw7eRDTibTAcl6fXtmUQ13vkezQiTqshHHdGueQeumgxxmQMIOmJYsh2p8BF08t8thhDQ++EAOOq3w==
|
||||
dependencies:
|
||||
is-relative-url "^4.0.0"
|
||||
isemail "^3.2.0"
|
||||
ms "^2.1.3"
|
||||
needle "^3.1.0"
|
||||
|
||||
loader-runner@^4.2.0:
|
||||
version "4.3.0"
|
||||
resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-4.3.0.tgz#c1b4a163b99f614830353b16755e7149ac2314e1"
|
||||
@ -6366,6 +6431,28 @@ markdown-escapes@^1.0.0:
|
||||
resolved "https://registry.yarnpkg.com/markdown-escapes/-/markdown-escapes-1.0.4.tgz#c95415ef451499d7602b91095f3c8e8975f78535"
|
||||
integrity sha512-8z4efJYk43E0upd0NbVXwgSTQs6cT3T06etieCMEg7dRbzCbxUCK/GHlX8mhHRDcp+OLlHkPKsvqQTCvsRl2cg==
|
||||
|
||||
markdown-link-check@^3.11.2:
|
||||
version "3.11.2"
|
||||
resolved "https://registry.yarnpkg.com/markdown-link-check/-/markdown-link-check-3.11.2.tgz#303a8a03d4a34c42ef3158e0b245bced26b5d904"
|
||||
integrity sha512-zave+vI4AMeLp0FlUllAwGbNytSKsS3R2Zgtf3ufVT892Z/L6Ro9osZwE9PNA7s0IkJ4onnuHqatpsaCiAShJw==
|
||||
dependencies:
|
||||
async "^3.2.4"
|
||||
chalk "^5.2.0"
|
||||
commander "^10.0.1"
|
||||
link-check "^5.2.0"
|
||||
lodash "^4.17.21"
|
||||
markdown-link-extractor "^3.1.0"
|
||||
needle "^3.2.0"
|
||||
progress "^2.0.3"
|
||||
|
||||
markdown-link-extractor@^3.1.0:
|
||||
version "3.1.0"
|
||||
resolved "https://registry.yarnpkg.com/markdown-link-extractor/-/markdown-link-extractor-3.1.0.tgz#0d5a703630d791a9e2017449e1a9b294f2d2b676"
|
||||
integrity sha512-r0NEbP1dsM+IqB62Ru9TXLP/HDaTdBNIeylYXumuBi6Xv4ufjE1/g3TnslYL8VNqNcGAGbMptQFHrrdfoZ/Sug==
|
||||
dependencies:
|
||||
html-link-extractor "^1.0.5"
|
||||
marked "^4.1.0"
|
||||
|
||||
markdown-table@^3.0.0:
|
||||
version "3.0.3"
|
||||
resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd"
|
||||
@ -6376,6 +6463,11 @@ marked@^2.0.3:
|
||||
resolved "https://registry.yarnpkg.com/marked/-/marked-2.1.3.tgz#bd017cef6431724fd4b27e0657f5ceb14bff3753"
|
||||
integrity sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA==
|
||||
|
||||
marked@^4.1.0:
|
||||
version "4.3.0"
|
||||
resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3"
|
||||
integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==
|
||||
|
||||
markprompt@^0.1.7:
|
||||
version "0.1.7"
|
||||
resolved "https://registry.yarnpkg.com/markprompt/-/markprompt-0.1.7.tgz#fa049e11109d93372c45c38b3ca40bd5fdf751ea"
|
||||
@ -6978,7 +7070,7 @@ ms@2.1.2:
|
||||
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
|
||||
integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
|
||||
|
||||
ms@2.1.3:
|
||||
ms@2.1.3, ms@^2.1.1, ms@^2.1.3:
|
||||
version "2.1.3"
|
||||
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
|
||||
integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
|
||||
@ -7001,6 +7093,15 @@ napi-build-utils@^1.0.1:
|
||||
resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806"
|
||||
integrity sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg==
|
||||
|
||||
needle@^3.1.0, needle@^3.2.0:
|
||||
version "3.2.0"
|
||||
resolved "https://registry.yarnpkg.com/needle/-/needle-3.2.0.tgz#07d240ebcabfd65c76c03afae7f6defe6469df44"
|
||||
integrity sha512-oUvzXnyLiVyVGoianLijF9O/RecZUf7TkBfimjGrLM4eQhXyeJwM6GeAWccwfQ9aa4gMCZKqhAOuLaMIcQxajQ==
|
||||
dependencies:
|
||||
debug "^3.2.6"
|
||||
iconv-lite "^0.6.3"
|
||||
sax "^1.2.4"
|
||||
|
||||
negotiator@0.6.3:
|
||||
version "0.6.3"
|
||||
resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd"
|
||||
@ -7753,6 +7854,11 @@ process-nextick-args@~2.0.0:
|
||||
resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
|
||||
integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
|
||||
|
||||
progress@^2.0.3:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
|
||||
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
|
||||
|
||||
promise@^7.1.1:
|
||||
version "7.3.1"
|
||||
resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf"
|
||||
@ -7805,16 +7911,16 @@ pump@^3.0.0:
|
||||
end-of-stream "^1.1.0"
|
||||
once "^1.3.1"
|
||||
|
||||
punycode@2.x.x, punycode@^2.1.0:
|
||||
version "2.3.0"
|
||||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f"
|
||||
integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==
|
||||
|
||||
punycode@^1.3.2:
|
||||
version "1.4.1"
|
||||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
|
||||
integrity sha512-jmYNElW7yvO7TV33CjSmvSiE2yco3bV2czu/OzDKdMNVZQWfxCblURLhf+47syQRBntjfLdd/H0egrzIG+oaFQ==
|
||||
|
||||
punycode@^2.1.0:
|
||||
version "2.3.0"
|
||||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f"
|
||||
integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==
|
||||
|
||||
pupa@^2.1.1:
|
||||
version "2.1.1"
|
||||
resolved "https://registry.yarnpkg.com/pupa/-/pupa-2.1.1.tgz#f5e8fd4afc2c5d97828faa523549ed8744a20d62"
|
||||
@ -8789,7 +8895,7 @@ safe-buffer@5.2.1, safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0,
|
||||
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
|
||||
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
|
||||
|
||||
"safer-buffer@>= 2.1.2 < 3":
|
||||
"safer-buffer@>= 2.1.2 < 3", "safer-buffer@>= 2.1.2 < 3.0.0":
|
||||
version "2.1.2"
|
||||
resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
|
||||
integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
|
||||
|
||||
@ -100,10 +100,9 @@ Currently, there are various models in GMS:
|
||||
1. [Urn](https://github.com/datahub-project/datahub/blob/master/li-utils/src/main/pegasus/com/linkedin/common/DatasetUrn.pdl) - Structs composing primary keys
|
||||
2. [Root] [Snapshots](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl) - Container of aspects
|
||||
3. [Aspects](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DashboardAspect.pdl) - Optional container of fields
|
||||
4. [Values](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/dataset/Dataset.pdl), [Keys](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/dataset/DatasetKey.pdl) - Model returned by GMS [Rest.li](http://rest.li) API (public facing)
|
||||
5. [Entities](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl) - Records with fields derived from the URN. Used only in graph / relationships
|
||||
6. [Relationships](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl) - Edges between 2 entities with optional edge properties
|
||||
7. [Search Documents](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl) - Flat documents for indexing within Elastic index
|
||||
4. [Keys](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DatasetKey.pdl) - Model returned by GMS [Rest.li](http://rest.li) API (public facing)
|
||||
5. [Relationships](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/EntityRelationship.pdl) - Edges between 2 entities with optional edge properties
|
||||
6. Search Documents - Flat documents for indexing within Elastic index
|
||||
- And corresponding index [mappings.json](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/resources/index/chart/mappings.json), [settings.json](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/resources/index/chart/settings.json)
|
||||
|
||||
Various components of GMS depend on / make assumptions about these model types:
|
||||
|
||||
@ -68,7 +68,7 @@ In the request body, select the `GraphQL` option and enter your GraphQL query in
|
||||
</p>
|
||||
|
||||
|
||||
Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql/) in the Postman documentation for more information.
|
||||
Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql-overview/) in the Postman documentation for more information.
|
||||
|
||||
### Authentication + Authorization
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ The figures below describe the high-level architecture of DataHub.
|
||||
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/DataHub-Architecture.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/saas/DataHub-Architecture.png"/>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
@ -19,13 +19,13 @@ To do so, navigate to the **Users & Groups** section inside of Settings page. He
|
||||
do not have the correct privileges to invite users, this button will be disabled.
|
||||
|
||||
<p align="center">
|
||||
<img width="100%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/invite-users-button.png"/>
|
||||
<img width="100%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/invite-users-button.png"/>
|
||||
</p>
|
||||
|
||||
To invite new users, simply share the link with others inside your organization.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/invite-users-popup.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/invite-users-popup.png"/>
|
||||
</p>
|
||||
|
||||
When a new user visits the link, they will be directed to a sign up screen where they can create their DataHub account.
|
||||
@ -37,13 +37,13 @@ and click **Reset user password** inside the menu dropdown on the right hand sid
|
||||
`Manage User Credentials` [Platform Privilege](../../authorization/access-policies-guide.md) in order to reset passwords.
|
||||
|
||||
<p align="center">
|
||||
<img width="100%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/reset-user-password-button.png"/>
|
||||
<img width="100%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/reset-user-password-button.png"/>
|
||||
</p>
|
||||
|
||||
To reset the password, simply share the password reset link with the user who needs to change their password. Password reset links expire after 24 hours.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/reset-user-password-popup.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/reset-user-password-popup.png"/>
|
||||
</p>
|
||||
|
||||
# Configuring Single Sign-On with OpenID Connect
|
||||
|
||||
@ -26,7 +26,7 @@ please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS
|
||||
To configure OIDC in React, you will most often need to register yourself as a client with your identity provider (Google, Okta, etc). Each provider may
|
||||
have their own instructions. Provided below are links to examples for Okta, Google, Azure AD, & Keycloak.
|
||||
|
||||
- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/apple/register-app-in-okta/)
|
||||
- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/)
|
||||
- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect)
|
||||
- [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc)
|
||||
- [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/)
|
||||
|
||||
@ -547,7 +547,7 @@ Old Entities Migrated = {'urn:li:dataset:(urn:li:dataPlatform:hive,logging_event
|
||||
### Using docker
|
||||
|
||||
[](https://hub.docker.com/r/acryldata/datahub-ingestion)
|
||||
[](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml)
|
||||
[](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml)
|
||||
|
||||
If you don't want to install locally, you can alternatively run metadata ingestion within a Docker container.
|
||||
We have prebuilt images available on [Docker hub](https://hub.docker.com/r/acryldata/datahub-ingestion). All plugins will be installed and enabled automatically.
|
||||
|
||||
@ -22,20 +22,20 @@ You can create this privileges by creating a new [Metadata Policy](./authorizati
|
||||
To create a Domain, first navigate to the **Domains** tab in the top-right menu of DataHub.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/domains-tab.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/domains-tab.png"/>
|
||||
</p>
|
||||
|
||||
Once you're on the Domains page, you'll see a list of all the Domains that have been created on DataHub. Additionally, you can
|
||||
view the number of entities inside each Domain.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/list-domains.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/list-domains.png"/>
|
||||
</p>
|
||||
|
||||
To create a new Domain, click '+ New Domain'.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/create-domain.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/create-domain.png"/>
|
||||
</p>
|
||||
|
||||
Inside the form, you can choose a name for your Domain. Most often, this will align with your business units or groups, for example
|
||||
@ -48,7 +48,7 @@ for the Domain. This option is useful if you intend to refer to Domains by a com
|
||||
key to be human-readable. Proceed with caution: once you select a custom id, it cannot be easily changed.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/set-domain-id.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/set-domain-id.png"/>
|
||||
</p>
|
||||
|
||||
By default, you don't need to worry about this. DataHub will auto-generate a unique Domain id for you.
|
||||
@ -64,7 +64,7 @@ To assign an asset to a Domain, simply navigate to the asset's profile page. At
|
||||
see a 'Domain' section. Click 'Set Domain', and then search for the Domain you'd like to add to. When you're done, click 'Add'.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/set-domain.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/set-domain.png"/>
|
||||
</p>
|
||||
|
||||
To remove an asset from a Domain, click the 'x' icon on the Domain tag.
|
||||
@ -149,27 +149,27 @@ source:
|
||||
Once you've created a Domain, you can use the search bar to find it.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/search-domain.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/search-domain.png"/>
|
||||
</p>
|
||||
|
||||
Clicking on the search result will take you to the Domain's profile, where you
|
||||
can edit its description, add / remove owners, and view the assets inside the Domain.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/domain-entities.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/domain-entities.png"/>
|
||||
</p>
|
||||
|
||||
Once you've added assets to a Domain, you can filter search results to limit to those Assets
|
||||
within a particular Domain using the left-side search filters.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/search-by-domain.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/search-by-domain.png"/>
|
||||
</p>
|
||||
|
||||
On the homepage, you'll also find a list of the most popular Domains in your organization.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/browse-domains.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/browse-domains.png"/>
|
||||
</p>
|
||||
|
||||
## Additional Resources
|
||||
@ -242,7 +242,6 @@ DataHub supports Tags, Glossary Terms, & Domains as distinct types of Metadata t
|
||||
- **Tags**: Informal, loosely controlled labels that serve as a tool for search & discovery. Assets may have multiple tags. No formal, central management.
|
||||
- **Glossary Terms**: A controlled vocabulary, with optional hierarchy. Terms are typically used to standardize types of leaf-level attributes (i.e. schema fields) for governance. E.g. (EMAIL_PLAINTEXT)
|
||||
- **Domains**: A set of top-level categories. Usually aligned to business units / disciplines to which the assets are most relevant. Central or distributed management. Single Domain assignment per data asset.
|
||||
|
||||
*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
|
||||
|
||||
### Related Features
|
||||
|
||||
@ -1,20 +1,20 @@
|
||||
# How to add a new metadata aspect?
|
||||
|
||||
Adding a new metadata [aspect](../what/aspect.md) is one of the most common ways to extend an existing [entity](../what/entity.md).
|
||||
We'll use the [CorpUserEditableInfo](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserEditableInfo.pdl) as an example here.
|
||||
We'll use the CorpUserEditableInfo as an example here.
|
||||
|
||||
1. Add the aspect model to the corresponding namespace (e.g. [`com.linkedin.identity`](https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/identity))
|
||||
|
||||
2. Extend the entity's aspect union to include the new aspect (e.g. [`CorpUserAspect`](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/CorpUserAspect.pdl))
|
||||
2. Extend the entity's aspect union to include the new aspect.
|
||||
|
||||
3. Rebuild the rest.li [IDL & snapshot](https://linkedin.github.io/rest.li/modeling/compatibility_check) by running the following command from the project root
|
||||
```
|
||||
./gradlew :metadata-service:restli-servlet-impl:build -Prest.model.compatibility=ignore
|
||||
```
|
||||
|
||||
4. To surface the new aspect at the top-level [resource endpoint](https://linkedin.github.io/rest.li/user_guide/restli_server#writing-resources), extend the resource data model (e.g. [`CorpUser`](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/identity/CorpUser.pdl)) with an optional field (e.g. [`editableInfo`](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/identity/CorpUser.pdl#L21)). You'll also need to extend the `toValue` & `toSnapshot` methods of the top-level resource (e.g. [`CorpUsers`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsers.java)) to convert between the snapshot & value models.
|
||||
4. To surface the new aspect at the top-level [resource endpoint](https://linkedin.github.io/rest.li/user_guide/restli_server#writing-resources), extend the resource data model with an optional field. You'll also need to extend the `toValue` & `toSnapshot` methods of the top-level resource (e.g. [`CorpUsers`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsers.java)) to convert between the snapshot & value models.
|
||||
|
||||
5. (Optional) If there's need to update the aspect via API (instead of/in addition to MCE), add a [sub-resource](https://linkedin.github.io/rest.li/user_guide/restli_server#sub-resources) endpoint for the new aspect (e.g. [`CorpUsersEditableInfoResource`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsersEditableInfoResource.java)). The sub-resource endpiont also allows you to retrieve previous versions of the aspect as well as additional metadata such as the audit stamp.
|
||||
5. (Optional) If there's need to update the aspect via API (instead of/in addition to MCE), add a [sub-resource](https://linkedin.github.io/rest.li/user_guide/restli_server#sub-resources) endpoint for the new aspect (e.g. `CorpUsersEditableInfoResource`). The sub-resource endpiont also allows you to retrieve previous versions of the aspect as well as additional metadata such as the audit stamp.
|
||||
|
||||
6. After rebuilding & restarting [gms](https://github.com/datahub-project/datahub/tree/master/gms), [mce-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mce-consumer-job) & [mae-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mae-consumer-job),
|
||||
6. After rebuilding & restarting gms, [mce-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mce-consumer-job) & [mae-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mae-consumer-job),z
|
||||
you should be able to start emitting [MCE](../what/mxe.md) with the new aspect and have it automatically ingested & stored in DB.
|
||||
|
||||
@ -24,7 +24,7 @@ We will refer to the two options as the **open-source fork** and **custom reposi
|
||||
## This Guide
|
||||
|
||||
This guide will outline what the experience of adding a new Entity should look like through a real example of adding the
|
||||
Dashboard Entity. If you want to extend an existing Entity, you can skip directly to [Step 3](#step_3).
|
||||
Dashboard Entity. If you want to extend an existing Entity, you can skip directly to [Step 3](#step-3-define-custom-aspects-or-attach-existing-aspects-to-your-entity).
|
||||
|
||||
At a high level, an entity is made up of:
|
||||
|
||||
@ -82,14 +82,14 @@ Because they are aspects, keys need to be annotated with an @Aspect annotation,
|
||||
can be a part of.
|
||||
|
||||
The key can also be annotated with the two index annotations: @Relationship and @Searchable. This instructs DataHub
|
||||
infra to use the fields in the key to create relationships and index fields for search. See [Step 3](#step_3) for more details on
|
||||
infra to use the fields in the key to create relationships and index fields for search. See [Step 3](#step-3-define-custom-aspects-or-attach-existing-aspects-to-your-entity) for more details on
|
||||
the annotation model.
|
||||
|
||||
**Constraints**: Note that each field in a Key Aspect MUST be of String or Enum type.
|
||||
|
||||
### <a name="step_2"></a>Step 2: Create the new entity with its key aspect
|
||||
|
||||
Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step_4) and [5](#step_5).
|
||||
Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step-4-choose-a-place-to-store-your-model-extension) and [5](#step-5-attaching-your-non-key-aspects-to-the-entity).
|
||||
|
||||
Example:
|
||||
```yaml
|
||||
@ -212,11 +212,11 @@ After you create your Aspect, you need to attach to all the entities that it app
|
||||
|
||||
**Constraints**: Note that all aspects MUST be of type Record.
|
||||
|
||||
### <a name="step_4"></a> Step 4: Choose a place to store your model extension
|
||||
### <a name="step_4"></a>Step 4: Choose a place to store your model extension
|
||||
|
||||
At the beginning of this document, we walked you through a flow-chart that should help you decide whether you need to maintain a fork of the open source DataHub repo for your model extensions, or whether you can just use a model extension repository that can stay independent of the DataHub repo. Depending on what path you took, the place you store your aspect model files (the .pdl files) and the entity-registry files (the yaml file called `entity-registry.yaml` or `entity-registry.yml`) will vary.
|
||||
|
||||
- Open source Fork: Aspect files go under [`metadata-models`](../../metadata-models) module in the main repo, entity registry goes into [`metadata-models/src/main/resources/entity-registry.yml`](../../metadata-models/src/main/resources/entity-registry.yml). Read on for more details in [Step 5](#step_5).
|
||||
- Open source Fork: Aspect files go under [`metadata-models`](../../metadata-models) module in the main repo, entity registry goes into [`metadata-models/src/main/resources/entity-registry.yml`](../../metadata-models/src/main/resources/entity-registry.yml). Read on for more details in [Step 5](#step-5-attaching-your-non-key-aspects-to-the-entity).
|
||||
- Custom repository: Read the [metadata-models-custom](../../metadata-models-custom/README.md) documentation to learn how to store and version your aspect models and registry.
|
||||
|
||||
### <a name="step_5"></a>Step 5: Attaching your non-key Aspect(s) to the Entity
|
||||
|
||||
@ -433,7 +433,7 @@ aggregation query against a timeseries aspect.
|
||||
The *@TimeseriesField* and the *@TimeseriesFieldCollection* are two new annotations that can be attached to a field of
|
||||
a *Timeseries aspect* that allows it to be part of an aggregatable query. The kinds of aggregations allowed on these
|
||||
annotated fields depends on the type of the field, as well as the kind of aggregation, as
|
||||
described [here](#Performing-an-aggregation-on-a-Timeseries-aspect).
|
||||
described [here](#performing-an-aggregation-on-a-timeseries-aspect).
|
||||
|
||||
* `@TimeseriesField = {}` - this annotation can be used with any type of non-collection type field of the aspect such as
|
||||
primitive types and records (see the fields *stat*, *strStat* and *strArray* fields
|
||||
@ -515,7 +515,7 @@ my_emitter = DatahubRestEmitter("http://localhost:8080")
|
||||
my_emitter.emit(mcpw)
|
||||
```
|
||||
|
||||
###### Performing an aggregation on a Timeseries aspect.
|
||||
###### Performing an aggregation on a Timeseries aspect
|
||||
|
||||
Aggreations on timeseries aspects can be performed by the GMS REST API for `/analytics?action=getTimeseriesStats` which
|
||||
accepts the following params.
|
||||
|
||||
10
docs/tags.md
10
docs/tags.md
@ -27,25 +27,25 @@ You can create these privileges by creating a new [Metadata Policy](./authorizat
|
||||
To add a tag at the dataset or container level, simply navigate to the page for that entity and click on the **Add Tag** button.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/add-tag.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/add-tag.png"/>
|
||||
</p>
|
||||
|
||||
Type in the name of the tag you want to add. You can add a new tag, or add a tag that already exists (the autocomplete will pull up the tag if it already exists).
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/add-tag-search.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/add-tag-search.png"/>
|
||||
</p>
|
||||
|
||||
Click on the "Add" button and you'll see the tag has been added!
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/added-tag.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/added-tag.png"/>
|
||||
</p>
|
||||
|
||||
If you would like to add a tag at the schema level, hover over the "Tags" column for a schema until the "Add Tag" button shows up, and then follow the same flow as above.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/add-schema-tag.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/add-schema-tag.png"/>
|
||||
</p>
|
||||
|
||||
### Removing a Tag
|
||||
@ -57,7 +57,7 @@ To remove a tag, simply click on the "X" button in the tag. Then click "Yes" whe
|
||||
You can search for a tag in the search bar, and even filter entities by the presence of a specific tag.
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/search-tag.png"/>
|
||||
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/search-tag.png"/>
|
||||
</p>
|
||||
|
||||
## Additional Resources
|
||||
|
||||
@ -328,7 +328,7 @@ November Town Hall (in December!)
|
||||
|
||||
* Welcome - 5 mins
|
||||
* Latest React App Demo! ([video](https://www.youtube.com/watch?v=RQBEJhcen5E)) by John Joyce and Gabe Lyons - 5 mins
|
||||
* Use-Case: DataHub at Geotab ([slides](https://docs.google.com/presentation/d/1qcgO3BW5NauuG0HnPqrxGcujsK-rJ1-EuU-7cbexkqE/edit?usp=sharing),[video](https://www.youtube.com/watch?v=boyjT2OrlU4)) by [John Yoon](https://www.linkedin.com/in/yhjyoon/) - 15 mins
|
||||
* Use-Case: DataHub at Geotab ([video](https://www.youtube.com/watch?v=boyjT2OrlU4)) by [John Yoon](https://www.linkedin.com/in/yhjyoon/) - 15 mins
|
||||
* Tech Deep Dive: Tour of new pull-based Python Ingestion scripts ([slides](https://docs.google.com/presentation/d/15Xay596WDIhzkc5c8DEv6M-Bv1N4hP8quup1tkws6ms/edit#slide=id.gb478361595_0_10),[video](https://www.youtube.com/watch?v=u0IUQvG-_xI)) by [Harshal Sheth](https://www.linkedin.com/in/hsheth2/) - 15 mins
|
||||
* General Q&A from sign up sheet, slack, and participants - 15 mins
|
||||
* Closing remarks - 5 mins
|
||||
|
||||
@ -2,6 +2,4 @@
|
||||
|
||||
Metadata for [entities](entity.md) [onboarded](../modeling/metadata-model.md) to [GMA](gma.md) is served through microservices known as Generalized Metadata Service (GMS). GMS typically provides a [Rest.li](http://rest.li) API and must access the metadata using [GMA DAOs](../architecture/metadata-serving.md).
|
||||
|
||||
While a GMS is completely free to define its public APIs, we do provide a list of [resource base classes](https://github.com/datahub-project/datahub-gma/tree/master/restli-resources/src/main/java/com/linkedin/metadata/restli) to leverage for common patterns.
|
||||
|
||||
GMA is designed to support a distributed fleet of GMS, each serving a subset of the [GMA graph](graph.md). However, for simplicity we include a single centralized GMS ([datahub-gms](../../gms)) that serves all entities.
|
||||
GMA is designed to support a distributed fleet of GMS, each serving a subset of the [GMA graph](graph.md). However, for simplicity we include a single centralized GMS that serves all entities.
|
||||
|
||||
@ -266,7 +266,7 @@ A Metadata Change Event represents a request to change multiple aspects for the
|
||||
It leverages a deprecated concept of `Snapshot`, which is a strongly-typed list of aspects for the same
|
||||
entity.
|
||||
|
||||
A MCE is a "proposal" for a set of metadata changes, as opposed to [MAE](#metadata-audit-event), which is conveying a committed change.
|
||||
A MCE is a "proposal" for a set of metadata changes, as opposed to [MAE](#metadata-audit-event-mae), which is conveying a committed change.
|
||||
Consequently, only successfully accepted and processed MCEs will lead to the emission of a corresponding MAE / MCLs.
|
||||
|
||||
### Emission
|
||||
|
||||
@ -102,9 +102,6 @@ For one, the actual direction doesn’t really impact the execution of graph que
|
||||
|
||||
That being said, generally there’s a more "natural way" to specify the direction of a relationship, which closely relate to how the metadata is stored. For example, the membership information for an LDAP group is generally stored as a list in group’s metadata. As a result, it’s more natural to model a `HasMember` relationship that points from a group to a member, instead of a `IsMemberOf` relationship pointing from member to group.
|
||||
|
||||
Since all relationships are explicitly declared, it’s fairly easy for a user to discover what relationships are available and their directionality by inspecting
|
||||
the [relationships directory](../../metadata-models/src/main/pegasus/com/linkedin/metadata/relationship). It’s also possible to provide a UI for the catalog of entities and relationships for analysts who are interested in building complex graph queries to gain insights into the metadata.
|
||||
|
||||
## High Cardinality Relationships
|
||||
|
||||
See [this doc](../advanced/high-cardinality.md) for suggestions on how to best model relationships with high cardinality.
|
||||
|
||||
@ -13,7 +13,6 @@ As a result, one may be tempted to add as many attributes as needed. This is acc
|
||||
Below shows an example schema for the `User` search document. Note that:
|
||||
1. Each search document is required to have a type-specific `urn` field, generally maps to an entity in the [graph](graph.md).
|
||||
2. Similar to `Entity`, each document has an optional `removed` field for "soft deletion".
|
||||
This is captured in [BaseDocument](../../metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl), which is expected to be included by all documents.
|
||||
3. Similar to `Entity`, all remaining fields are made `optional` to support partial updates.
|
||||
4. `management` shows an example of a string array field.
|
||||
5. `ownedDataset` shows an example on how a field can be derived from metadata [aspects](aspect.md) associated with other types of entity (in this case, `Dataset`).
|
||||
|
||||
@ -60,16 +60,14 @@ class StaleEntityCheckpointStateBase(CheckpointStateBase, ABC, Generic[Derived])
|
||||
```
|
||||
|
||||
Examples:
|
||||
1. [KafkaCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/kafka_state.py#L11).
|
||||
2. [DbtCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/dbt_state.py#L16)
|
||||
3. [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17)
|
||||
* [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17)
|
||||
|
||||
### 2. Modifying the SourceConfig
|
||||
|
||||
The source's config must inherit from `StatefulIngestionConfigBase`, and should declare a field named `stateful_ingestion` of type `Optional[StatefulStaleMetadataRemovalConfig]`.
|
||||
|
||||
Examples:
|
||||
1. The `KafkaSourceConfig`
|
||||
- The `KafkaSourceConfig`
|
||||
```python
|
||||
from typing import List, Optional
|
||||
import pydantic
|
||||
@ -84,9 +82,6 @@ class KafkaSourceConfig(StatefulIngestionConfigBase):
|
||||
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
||||
```
|
||||
|
||||
2. The [DBTStatefulIngestionConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L131)
|
||||
and the [DBTConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L317).
|
||||
|
||||
### 3. Modifying the SourceReport
|
||||
The report class of the source should inherit from `StaleEntityRemovalSourceReport` whose definition is shown below.
|
||||
```python
|
||||
@ -102,7 +97,7 @@ class StaleEntityRemovalSourceReport(StatefulIngestionReport):
|
||||
```
|
||||
|
||||
Examples:
|
||||
1. The `KafkaSourceReport`
|
||||
* The `KafkaSourceReport`
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalSourceReport
|
||||
@ -110,7 +105,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEnt
|
||||
class KafkaSourceReport(StaleEntityRemovalSourceReport):
|
||||
# <rest of kafka source report specific impl
|
||||
```
|
||||
2. [DBTSourceReport](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L142)
|
||||
|
||||
### 4. Modifying the Source
|
||||
The source must inherit from `StatefulIngestionSourceBase`.
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ reporting:
|
||||
An ingestion reporting state provider is responsible for saving and retrieving the ingestion telemetry
|
||||
associated with the ingestion runs of various jobs inside the source connector of the ingestion pipeline.
|
||||
The data model used for capturing the telemetry is [DatahubIngestionRunSummary](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionRunSummary.pdl).
|
||||
A reporting ingestion state provider needs to implement the [IngestionReportingProviderBase](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_reporting_provider_base.py)
|
||||
A reporting ingestion state provider needs to implement the IngestionReportingProviderBase.
|
||||
interface and register itself with datahub by adding an entry under `datahub.ingestion.reporting_provider.plugins`
|
||||
key of the entry_points section in [setup.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/setup.py)
|
||||
with its type and implementation class as shown below.
|
||||
|
||||
@ -22,14 +22,14 @@ noCode: "true"
|
||||
|
||||
Note that a `.` is used to denote nested fields in the YAML recipe.
|
||||
|
||||
| Field | Required | Default | Description |
|
||||
|--------------------------------------------------------------| -------- |------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `source.config.stateful_ingestion.enabled` | | False | The type of the ingestion state provider registered with datahub. |
|
||||
| `source.config.stateful_ingestion.ignore_old_state` | | False | If set to True, ignores the previous checkpoint state. |
|
||||
| `source.config.stateful_ingestion.ignore_new_state` | | False | If set to True, ignores the current checkpoint state. |
|
||||
| `source.config.stateful_ingestion.max_checkpoint_state_size` | | 2^24 (16MB) | The maximum size of the checkpoint state in bytes. |
|
||||
| `source.config.stateful_ingestion.state_provider` | | The default [datahub ingestion state provider](#datahub-ingestion-state-provider) configuration. | The ingestion state provider configuration. |
|
||||
| `pipeline_name` | ✅ | | The name of the ingestion pipeline the checkpoint states of various source connector job runs are saved/retrieved against via the ingestion state provider. |
|
||||
| Field | Required | Default | Description |
|
||||
|--------------------------------------------------------------| -------- |-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `source.config.stateful_ingestion.enabled` | | False | The type of the ingestion state provider registered with datahub. |
|
||||
| `source.config.stateful_ingestion.ignore_old_state` | | False | If set to True, ignores the previous checkpoint state. |
|
||||
| `source.config.stateful_ingestion.ignore_new_state` | | False | If set to True, ignores the current checkpoint state. |
|
||||
| `source.config.stateful_ingestion.max_checkpoint_state_size` | | 2^24 (16MB) | The maximum size of the checkpoint state in bytes. |
|
||||
| `source.config.stateful_ingestion.state_provider` | | The default datahub ingestion state provider configuration. | The ingestion state provider configuration. |
|
||||
| `pipeline_name` | ✅ | | The name of the ingestion pipeline the checkpoint states of various source connector job runs are saved/retrieved against via the ingestion state provider. |
|
||||
|
||||
NOTE: If either `dry-run` or `preview` mode are set, stateful ingestion will be turned off regardless of the rest of the configuration.
|
||||
## Use-cases powered by stateful ingestion.
|
||||
|
||||
@ -9,8 +9,8 @@ and uses DataHub S3 Data Lake integration source under the hood. Refer section [
|
||||
This ingestion source maps the following Source System Concepts to DataHub Concepts:
|
||||
|
||||
| Source Concept | DataHub Concept | Notes |
|
||||
| ------------------------------------------ | ------------------------------------------------------------------------------------------ | -------------------- |
|
||||
| `"Google Cloud Storage"` | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataPlatform/) | |
|
||||
| ------------------------------------------ |--------------------------------------------------------------------------------------------| -------------------- |
|
||||
| `"Google Cloud Storage"` | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform/) | |
|
||||
| GCS object / Folder containing GCS objects | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/) | |
|
||||
| GCS bucket | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/) | Subtype `GCS bucket` |
|
||||
| GCS folder | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/) | Subtype `Folder` |
|
||||
|
||||
@ -10,11 +10,11 @@ This plugin extracts the following:
|
||||
|
||||
This ingestion source maps the following Source System Concepts to DataHub Concepts:
|
||||
|
||||
| Source Concept | DataHub Concept | Notes |
|
||||
| --------------------------- | ------------------------------------------------------------- | --------------------------------------------------------------------------- |
|
||||
| `"kafka-connect"` | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataPlatform/) | |
|
||||
| [Connector](https://kafka.apache.org/documentation/#connect_connectorsandtasks) | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/) | |
|
||||
| Kafka Topic | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/) | |
|
||||
| Source Concept | DataHub Concept | Notes |
|
||||
| --------------------------- |--------------------------------------------------------------------------------------------| --------------------------------------------------------------------------- |
|
||||
| `"kafka-connect"` | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform/) | |
|
||||
| [Connector](https://kafka.apache.org/documentation/#connect_connectorsandtasks) | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/) | |
|
||||
| Kafka Topic | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/) | |
|
||||
|
||||
## Current limitations
|
||||
|
||||
|
||||
@ -6,8 +6,8 @@ To specify the group of files that form a dataset, use `path_specs` configuratio
|
||||
This ingestion source maps the following Source System Concepts to DataHub Concepts:
|
||||
|
||||
| Source Concept | DataHub Concept | Notes |
|
||||
| ---------------------------------------- | ------------------------------------------------------------------------------------------ | ------------------- |
|
||||
| `"s3"` | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataPlatform/) | |
|
||||
| ---------------------------------------- |--------------------------------------------------------------------------------------------| ------------------- |
|
||||
| `"s3"` | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform/) | |
|
||||
| s3 object / Folder containing s3 objects | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/) | |
|
||||
| s3 bucket | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/) | Subtype `S3 bucket` |
|
||||
| s3 folder | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/) | Subtype `Folder` |
|
||||
|
||||
@ -2,4 +2,4 @@
|
||||
|
||||
This script sets up a transformer that reads in a list of owner URNs from a JSON file specified via `owners_json` and appends these owners to every MCE.
|
||||
|
||||
See the transformers tutorial (https://datahubproject.io/docs/metadata-ingestion/transformers) for how this module is built and run.
|
||||
See the transformers tutorial (https://datahubproject.io/docs/metadata-ingestion/docs/transformer/intro) for how this module is built and run.
|
||||
|
||||
@ -112,9 +112,6 @@ class TrinoUsageSource(Source):
|
||||
|
||||
#### Prerequsities
|
||||
1. You need to setup Event Logger which saves audit logs into a Postgres db and setup this db as a catalog in Trino
|
||||
Here you can find more info about how to setup:
|
||||
https://docs.starburst.io/354-e/security/event-logger.html#security-event-logger--page-root
|
||||
https://docs.starburst.io/354-e/security/event-logger.html#analyzing-the-event-log
|
||||
|
||||
2. Install starbust-trino-usage plugin
|
||||
Run pip install 'acryl-datahub[starburst-trino-usage]'.
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
DataHub uses Kafka as the pub-sub message queue in the backend. There are 2 Kafka topics used by DataHub which are
|
||||
`MetadataChangeEvent` and `MetadataAuditEvent`.
|
||||
* `MetadataChangeEvent:` This message is emitted by any data platform or crawler in which there is a change in the metadata.
|
||||
* `MetadataAuditEvent:` This message is emitted by [DataHub GMS](../gms) to notify that metadata change is registered.
|
||||
* `MetadataAuditEvent:` This message is emitted by [DataHub GMS](../metadata-service/README.md) to notify that metadata change is registered.
|
||||
|
||||
To be able to consume from these two topics, there are two Spring
|
||||
jobs DataHub uses:
|
||||
* [MCE Consumer Job](mce-consumer-job): Writes to [DataHub GMS](../gms)
|
||||
* [MCE Consumer Job](mce-consumer-job): Writes to [DataHub GMS](../metadata-service/README.md)
|
||||
* [MAE Consumer Job](mae-consumer-job): Writes to [Elasticsearch](../docker/elasticsearch) & [Neo4j](../docker/neo4j)
|
||||
|
||||
@ -6,6 +6,4 @@ Examples of data platforms are `redshift`, `hive`, `bigquery`, `looker`, `tablea
|
||||
|
||||
## Identity
|
||||
|
||||
Data Platforms are identified by the name of the technology. A complete list of currently supported data platforms is available [here](https://raw.githubusercontent.com/datahub-project/datahub/master/metadata-service/restli-servlet-impl/src/main/resources/DataPlatformInfo.json).
|
||||
|
||||
|
||||
Data Platforms are identified by the name of the technology. A complete list of currently supported data platforms is available [here](https://raw.githubusercontent.com/datahub-project/datahub/master/metadata-service/war/src/main/resources/boot/data_platforms.json).
|
||||
Loading…
x
Reference in New Issue
Block a user