mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-14 12:16:52 +00:00
fix: refactor toc (#7862)
This commit is contained in:
parent
a3a8107b92
commit
2bc0a781a6
@ -9,65 +9,43 @@ module.exports = {
|
||||
|
||||
overviewSidebar: [
|
||||
{
|
||||
"What is DataHub?": [
|
||||
label: "Getting Started",
|
||||
type: "category",
|
||||
collapsed: true,
|
||||
items: [
|
||||
// By the end of this section, readers should understand the core use cases that DataHub addresses,
|
||||
// target end-users, high-level architecture, & hosting options
|
||||
|
||||
"docs/features",
|
||||
"docs/what-is-datahub/datahub-concepts",
|
||||
{
|
||||
type: "doc",
|
||||
label: "Introduction",
|
||||
id: "docs/features",
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
label: "Quickstart",
|
||||
id: "docs/quickstart",
|
||||
},
|
||||
{
|
||||
type: "link",
|
||||
label: "See Datahub In Action",
|
||||
label: "Demo",
|
||||
href: "https://demo.datahubproject.io/",
|
||||
},
|
||||
{
|
||||
Architecture: [
|
||||
"docs/architecture/architecture",
|
||||
"docs/components",
|
||||
"docs/architecture/metadata-ingestion",
|
||||
"docs/architecture/metadata-serving",
|
||||
],
|
||||
},
|
||||
"docs/what-is-datahub/datahub-concepts",
|
||||
"docs/saas",
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Get Started",
|
||||
collapsed: false,
|
||||
// The goal of this section is to provide the bare-minimum steps required to:
|
||||
// - Get DataHub Running
|
||||
// - Optionally configure SSO
|
||||
// - Add/invite Users
|
||||
// - Create Polices & assign roles
|
||||
// - Ingest at least one source (ie. data warehouse)
|
||||
// - Understand high-level options for enriching metadata
|
||||
link: { type: "doc", id: "docs/get-started-with-datahub" },
|
||||
items: [
|
||||
Integrations: [
|
||||
// The purpose of this section is to provide a deeper understanding of how ingestion works.
|
||||
// Readers should be able to find details for ingesting from all systems, apply transformers, understand sinks,
|
||||
// and understand key concepts of the Ingestion Framework (Sources, Sinks, Transformers, and Recipes)
|
||||
{
|
||||
"Self-Hosted DataHub": [
|
||||
"docs/quickstart",
|
||||
"docs/authentication/guides/add-users",
|
||||
],
|
||||
type: "doc",
|
||||
label: "Introduction",
|
||||
id: "metadata-ingestion/README",
|
||||
},
|
||||
{
|
||||
"Managed DataHub": [
|
||||
"docs/authentication/guides/add-users",
|
||||
|
||||
{
|
||||
type: "doc",
|
||||
id: "docs/managed-datahub/saas-slack-setup",
|
||||
className: "saasOnly",
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
id: "docs/managed-datahub/approval-workflows",
|
||||
className: "saasOnly",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"Ingestion Quickstart Guides": [
|
||||
"Quickstart Guides": [
|
||||
{
|
||||
BigQuery: [
|
||||
"docs/quick-ingestion-guides/bigquery/overview",
|
||||
@ -105,17 +83,6 @@ module.exports = {
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"Ingest Metadata": [
|
||||
// The purpose of this section is to provide a deeper understanding of how ingestion works.
|
||||
// Readers should be able to find details for ingesting from all systems, apply transformers, understand sinks,
|
||||
// and understand key concepts of the Ingestion Framework (Sources, Sinks, Transformers, and Recipes)
|
||||
{
|
||||
Overview: ["metadata-ingestion/README", "docs/ui-ingestion"],
|
||||
},
|
||||
|
||||
{
|
||||
Sources: [
|
||||
// collapse these; add push-based at top
|
||||
@ -171,73 +138,7 @@ module.exports = {
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
// The purpose of this section is to provide direction on how to enrich metadata when shift-left isn’t an option
|
||||
label: "Enrich Metadata",
|
||||
link: { type: "doc", id: "docs/enrich-metadata" },
|
||||
items: [
|
||||
"docs/domains",
|
||||
"docs/glossary/business-glossary",
|
||||
"docs/tags",
|
||||
"docs/lineage/lineage-feature-guide",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
type: "category",
|
||||
label: "Act on Metadata",
|
||||
link: { type: "doc", id: "docs/act-on-metadata" },
|
||||
items: [
|
||||
{
|
||||
"Actions Framework": [
|
||||
"docs/actions/README",
|
||||
"docs/actions/quickstart",
|
||||
"docs/actions/concepts",
|
||||
{
|
||||
Sources: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/sources",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Events: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/events",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Actions: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/actions",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Guides: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/guides",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
id: "docs/tests/metadata-tests",
|
||||
className: "saasOnly",
|
||||
},
|
||||
"docs/act-on-metadata/impact-analysis",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"Deploy DataHub": [
|
||||
Deployment: [
|
||||
// The purpose of this section is to provide the minimum steps required to deploy DataHub to the vendor of your choosing
|
||||
"docs/deploy/aws",
|
||||
"docs/deploy/gcp",
|
||||
@ -247,6 +148,7 @@ module.exports = {
|
||||
Authentication: [
|
||||
"docs/authentication/README",
|
||||
"docs/authentication/concepts",
|
||||
"docs/authentication/guides/add-users",
|
||||
{
|
||||
"Frontend Authentication": [
|
||||
"docs/authentication/guides/jaas",
|
||||
@ -290,9 +192,8 @@ module.exports = {
|
||||
"docs/how/updating-datahub",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"DataHub API": [
|
||||
API: [
|
||||
"docs/api/datahub-apis",
|
||||
{
|
||||
"GraphQL API": [
|
||||
@ -367,20 +268,16 @@ module.exports = {
|
||||
],
|
||||
},
|
||||
{
|
||||
OpenAPI: [
|
||||
{
|
||||
label: "Usage Guide",
|
||||
type: "doc",
|
||||
id: "docs/api/openapi/openapi-usage-guide",
|
||||
},
|
||||
"docs/dev-guides/timeline",
|
||||
],
|
||||
type: "doc",
|
||||
label: "OpenAPI",
|
||||
id: "docs/api/openapi/openapi-usage-guide",
|
||||
},
|
||||
"docs/dev-guides/timeline",
|
||||
{
|
||||
"Rest.li": [
|
||||
"Rest.li API": [
|
||||
{
|
||||
type: "doc",
|
||||
label: "Rest.li API",
|
||||
label: "Rest.li API Guide",
|
||||
id: "docs/api/restli/restli-overview",
|
||||
},
|
||||
{
|
||||
@ -395,6 +292,20 @@ module.exports = {
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"Python SDK": [
|
||||
"metadata-ingestion/as-a-library",
|
||||
{
|
||||
"Python SDK Reference": [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "python-sdk",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
"metadata-integration/java/as-a-library",
|
||||
{
|
||||
"Getting Started: APIs & SDKs": [
|
||||
"docs/api/tutorials/modifying-dataset-tags",
|
||||
@ -408,31 +319,81 @@ module.exports = {
|
||||
"docs/api/tutorials/modifying-dataset-custom-properties",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
Tools: [
|
||||
"docs/cli",
|
||||
{
|
||||
SDKs: [
|
||||
"metadata-ingestion/as-a-library",
|
||||
"metadata-integration/java/as-a-library",
|
||||
],
|
||||
type: "category",
|
||||
label: "Datahub CLI",
|
||||
items: ["docs/cli", "docs/datahub_lite"],
|
||||
},
|
||||
{
|
||||
"Python SDK Reference": [
|
||||
type: "category",
|
||||
label: "Datahub Actions",
|
||||
link: { type: "doc", id: "docs/act-on-metadata" },
|
||||
items: [
|
||||
"docs/actions/README",
|
||||
"docs/actions/quickstart",
|
||||
"docs/actions/concepts",
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "python-sdk",
|
||||
Sources: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/sources",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Events: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/events",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Actions: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/actions",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Guides: [
|
||||
{
|
||||
type: "autogenerated",
|
||||
dirName: "docs/actions/guides",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
"docs/datahub_lite",
|
||||
],
|
||||
},
|
||||
{
|
||||
"Developer Guides": [
|
||||
Features: [
|
||||
"docs/ui-ingestion",
|
||||
"docs/how/search",
|
||||
"docs/schema-history",
|
||||
// "docs/how/ui-tabs-guide",
|
||||
"docs/domains",
|
||||
"docs/glossary/business-glossary",
|
||||
"docs/tags",
|
||||
"docs/browse",
|
||||
"docs/authorization/access-policies-guide",
|
||||
"docs/features/dataset-usage-and-query-history",
|
||||
"docs/posts",
|
||||
"docs/sync-status",
|
||||
"docs/architecture/stemming_and_synonyms",
|
||||
"docs/lineage/lineage-feature-guide",
|
||||
{
|
||||
type: "doc",
|
||||
id: "docs/tests/metadata-tests",
|
||||
className: "saasOnly",
|
||||
},
|
||||
"docs/act-on-metadata/impact-analysis",
|
||||
],
|
||||
},
|
||||
{
|
||||
Develop: [
|
||||
// The purpose of this section is to provide developers & technical users with
|
||||
// concrete tutorials for how to work with the DataHub CLI & APIs
|
||||
{
|
||||
@ -450,6 +411,14 @@ module.exports = {
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
Architecture: [
|
||||
"docs/architecture/architecture",
|
||||
"docs/components",
|
||||
"docs/architecture/metadata-ingestion",
|
||||
"docs/architecture/metadata-serving",
|
||||
],
|
||||
},
|
||||
{
|
||||
"Developing on DataHub": [
|
||||
"docs/developers",
|
||||
@ -467,10 +436,14 @@ module.exports = {
|
||||
},
|
||||
],
|
||||
},
|
||||
"docs/debugging",
|
||||
"docs/faq-using-datahub",
|
||||
"docs/plugins",
|
||||
|
||||
{
|
||||
Troubleshooting: [
|
||||
"docs/troubleshooting/quickstart",
|
||||
"docs/troubleshooting/build",
|
||||
"docs/troubleshooting/general",
|
||||
],
|
||||
},
|
||||
{
|
||||
Advanced: [
|
||||
"metadata-ingestion/docs/dev_guides/reporting_telemetry",
|
||||
@ -488,27 +461,8 @@ module.exports = {
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"Feature Guides": [
|
||||
"docs/how/search",
|
||||
"docs/schema-history",
|
||||
// "docs/how/ui-tabs-guide",
|
||||
"docs/domains",
|
||||
"docs/glossary/business-glossary",
|
||||
"docs/tags",
|
||||
"docs/browse",
|
||||
"docs/authorization/access-policies-guide",
|
||||
"docs/features/dataset-usage-and-query-history",
|
||||
"docs/posts",
|
||||
"docs/sync-status",
|
||||
"docs/architecture/stemming_and_synonyms",
|
||||
"docs/lineage/lineage-feature-guide",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"Join the Community": [
|
||||
Community: [
|
||||
"docs/slack",
|
||||
"docs/townhalls",
|
||||
"docs/townhall-history",
|
||||
@ -522,6 +476,16 @@ module.exports = {
|
||||
"Managed DataHub": [
|
||||
"docs/managed-datahub/managed-datahub-overview",
|
||||
"docs/managed-datahub/welcome-acryl",
|
||||
{
|
||||
type: "doc",
|
||||
id: "docs/managed-datahub/saas-slack-setup",
|
||||
className: "saasOnly",
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
id: "docs/managed-datahub/approval-workflows",
|
||||
className: "saasOnly",
|
||||
},
|
||||
{
|
||||
"Metadata Ingestion With Acryl": [
|
||||
"docs/managed-datahub/metadata-ingestion-with-acryl/ingestion",
|
||||
@ -579,7 +543,6 @@ module.exports = {
|
||||
id: "docs/managed-datahub/chrome-extension",
|
||||
className: "saasOnly",
|
||||
},
|
||||
|
||||
{
|
||||
"Managed DataHub Release History": [
|
||||
"docs/managed-datahub/release-notes/v_0_2_5",
|
||||
|
@ -154,7 +154,7 @@ const quickLinkContent = [
|
||||
title: "Get Started",
|
||||
icon: <ThunderboltTwoTone />,
|
||||
description: "Details on how to get DataHub up and running",
|
||||
to: "/docs/get-started-with-datahub",
|
||||
to: "/docs/quickstart",
|
||||
},
|
||||
{
|
||||
title: "Ingest Metadata",
|
||||
@ -163,10 +163,10 @@ const quickLinkContent = [
|
||||
to: "/docs/metadata-ingestion",
|
||||
},
|
||||
{
|
||||
title: "Enrich Metadata",
|
||||
title: "API",
|
||||
icon: <DeploymentUnitOutlined />,
|
||||
description: "Improve the quality and coverage of Metadata",
|
||||
to: "docs/enrich-metadata",
|
||||
description: "Details on how to utilize Metadata programmatically",
|
||||
to: "docs/api/datahub-apis",
|
||||
},
|
||||
{
|
||||
title: "Act on Metadata",
|
||||
|
@ -285,3 +285,18 @@ div[class^="announcementBar"] {
|
||||
--docsearch-footer-background: var(--ifm-background-surface-color);
|
||||
--docsearch-key-gradient: linear-gradient(-26.5deg, var(--ifm-color-emphasis-200) 0%, var(--ifm-color-emphasis-100) 100%);
|
||||
}
|
||||
|
||||
.theme-doc-sidebar-item-category-level-1 > div > a:first-child {
|
||||
color: var(--ifm-navbar-link-color);
|
||||
font-size: 17px;
|
||||
}
|
||||
|
||||
.theme-doc-sidebar-item-category-level-1 > div > a.menu__link--active {
|
||||
color: var(--ifm-menu-color-active);
|
||||
font-size: 17px;
|
||||
}
|
||||
|
||||
/* Increase padding for levels greater than 1 */
|
||||
[class^="theme-doc-sidebar-item"][class*="-level-"]:not(.theme-doc-sidebar-item-category-level-1) {
|
||||
padding-left: 8px;
|
||||
}
|
||||
|
@ -1,14 +0,0 @@
|
||||
# Enriching Metadata in DataHub
|
||||
|
||||
Metadata Enrichment is a powerful way to annotate entities within DataHub, supercharging data discoverability and ensuring end-users have quick access to critical context for a given entity, such as:
|
||||
|
||||
* **Ownership**: who is responsible/accountable?
|
||||
* **Description**: what is the intended use case? What known caveats/edge cases exist?
|
||||
* **Glossary Terms**: how is it relevant to core business metrics?
|
||||
* **Domain**: how is it associated with organizational domains?
|
||||
|
||||
This section contains detailed usage guides to help you begin enriching your data entities within DataHub.
|
||||
|
||||
<p align="center">
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/xzYJ2lMJraY" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
</p>
|
@ -1,23 +0,0 @@
|
||||
# Get Started With DataHub
|
||||
|
||||
These guides are focused on helping you get up and running with DataHub as quickly as possible.
|
||||
|
||||
## Self-Hosted DataHub
|
||||
|
||||
If you're looking to build and deploy DataHub youself, start here.
|
||||
|
||||
<a
|
||||
className='button button--primary button--lg'
|
||||
href="/docs/quickstart">
|
||||
Get Started with Self-Hosted DataHub
|
||||
</a>
|
||||
|
||||
## Managed DataHub
|
||||
|
||||
[Acryl Data](https://www.acryldata.io/product) provides a fully managed, premium version of DataHub.
|
||||
|
||||
<a
|
||||
className='button button--primary button--lg'
|
||||
href="/docs/authentication/guides/add-users">
|
||||
Get Started with Managed DataHub
|
||||
</a>
|
@ -1,5 +1,14 @@
|
||||
# DataHub Quickstart Guide
|
||||
|
||||
This guide provides instructions on deploying the open source DataHub locally.
|
||||
If you're interested in a managed version, [Acryl Data](https://www.acryldata.io/product) provides a fully managed, premium version of DataHub.
|
||||
|
||||
<a
|
||||
className='button button--primary button--lg'
|
||||
href="/docs/managed-datahub/welcome-acryl">
|
||||
Get Started with Managed DataHub
|
||||
</a>
|
||||
|
||||
## Deploying DataHub
|
||||
|
||||
To deploy a new instance of DataHub, perform the following steps.
|
||||
|
@ -10,3 +10,5 @@ Sign up for fully managed, hassle-free and secure SaaS service for DataHub, prov
|
||||
Sign up
|
||||
</a>
|
||||
</p>
|
||||
|
||||
Refer to [Managed Datahub Exclusives](/docs/managed-datahub/managed-datahub-overview.md) for more information.
|
42
docs/troubleshooting/build.md
Normal file
42
docs/troubleshooting/build.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Build Debugging Guide
|
||||
|
||||
For when [Local Development](/docs/developers.md) did not work out smoothly.
|
||||
|
||||
## Getting `Unsupported class file major version 57`
|
||||
|
||||
You're probably using a Java version that's too new for gradle. Run the following command to check your Java version
|
||||
|
||||
```
|
||||
java --version
|
||||
```
|
||||
|
||||
While it may be possible to build and run DataHub using newer versions of Java, we currently only support [Java 11](https://openjdk.org/projects/jdk/11/) (aka Java 11).
|
||||
|
||||
## Getting `cannot find symbol` error for `javax.annotation.Generated`
|
||||
|
||||
Similar to the previous issue, please use Java 1.8 to build the project.
|
||||
You can install multiple version of Java on a single machine and switch between them using the `JAVA_HOME` environment variable. See [this document](https://docs.oracle.com/cd/E21454_01/html/821-2531/inst_jdk_javahome_t.html) for more details.
|
||||
|
||||
## `:metadata-models:generateDataTemplate` task fails with `java.nio.file.InvalidPathException: Illegal char <:> at index XX` or `Caused by: java.lang.IllegalArgumentException: 'other' has different root` error
|
||||
|
||||
This is a [known issue](https://github.com/linkedin/rest.li/issues/287) when building the project on Windows due a bug in the Pegasus plugin. Please refer to [Windows Compatibility](/docs/developers.md#windows-compatibility).
|
||||
|
||||
## Various errors related to `generateDataTemplate` or other `generate` tasks
|
||||
|
||||
As we generate quite a few files from the models, it is possible that old generated files may conflict with new model changes. When this happens, a simple `./gradlew clean` should reosolve the issue.
|
||||
|
||||
## `Execution failed for task ':metadata-service:restli-servlet-impl:checkRestModel'`
|
||||
|
||||
This generally means that an [incompatible change](https://linkedin.github.io/rest.li/modeling/compatibility_check) was introduced to the rest.li API in GMS. You'll need to rebuild the snapshots/IDL by running the following command once
|
||||
|
||||
```
|
||||
./gradlew :metadata-service:restli-servlet-impl:build -Prest.model.compatibility=ignore
|
||||
```
|
||||
|
||||
## `java.io.IOException: No space left on device`
|
||||
|
||||
This means you're running out of space on your disk to build. Please free up some space or try a different disk.
|
||||
|
||||
## `Build failed` for task `./gradlew :datahub-frontend:dist -x yarnTest -x yarnLint`
|
||||
|
||||
This could mean that you need to update your [Yarn](https://yarnpkg.com/getting-started/install) version
|
@ -1,10 +1,11 @@
|
||||
# FAQ Using DataHub
|
||||
# General Debugging Guide
|
||||
|
||||
## Logo for my platform is not appearing on the Home Page or search results
|
||||
|
||||
Please see if either of these guides help you
|
||||
- [Adding a custom Dataset Data Platform](./how/add-custom-data-platform.md)
|
||||
- [DataHub CLI put platform command](./cli.md#put-platform)
|
||||
|
||||
- [Adding a custom Dataset Data Platform](../how/add-custom-data-platform.md)
|
||||
- [DataHub CLI put platform command](../cli.md#put-platform)
|
||||
|
||||
## How do I add dataset freshness indicator for datasets?
|
||||
|
@ -1,10 +1,85 @@
|
||||
# Quickstart Debugging Guide
|
||||
|
||||
For when [Quickstart](./quickstart.md) did not work out smoothly.
|
||||
For when [Quickstart](/docs/quickstart.md) did not work out smoothly.
|
||||
|
||||
## Common Problems
|
||||
|
||||
<details><summary>
|
||||
Command not found: datahub
|
||||
</summary>
|
||||
|
||||
If running the datahub cli produces "command not found" errors inside your terminal, your system may be defaulting to an
|
||||
older version of Python. Try prefixing your `datahub` commands with `python3 -m`:
|
||||
|
||||
```bash
|
||||
python3 -m datahub docker quickstart
|
||||
```
|
||||
|
||||
Another possibility is that your system PATH does not include pip's `$HOME/.local/bin` directory. On linux, you can add this to your `~/.bashrc`:
|
||||
|
||||
```bash
|
||||
if [ -d "$HOME/.local/bin" ] ; then
|
||||
PATH="$HOME/.local/bin:$PATH"
|
||||
fi
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>
|
||||
Port Conflicts
|
||||
</summary>
|
||||
|
||||
By default the quickstart deploy will require the following ports to be free on your local machine:
|
||||
|
||||
- 3306 for MySQL
|
||||
- 9200 for Elasticsearch
|
||||
- 9092 for the Kafka broker
|
||||
- 8081 for Schema Registry
|
||||
- 2181 for ZooKeeper
|
||||
- 9002 for the DataHub Web Application (datahub-frontend)
|
||||
- 8080 for the DataHub Metadata Service (datahub-gms)
|
||||
|
||||
In case the default ports conflict with software you are already running on your machine, you can override these ports by passing additional flags to the `datahub docker quickstart` command.
|
||||
e.g. To override the MySQL port with 53306 (instead of the default 3306), you can say: `datahub docker quickstart --mysql-port 53306`. Use `datahub docker quickstart --help` to see all the supported options.
|
||||
For the metadata service container (datahub-gms), you need to use an environment variable, `DATAHUB_MAPPED_GMS_PORT`. So for instance to use the port 58080, you would say `DATAHUB_MAPPED_GMS_PORT=58080 datahub docker quickstart`
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>
|
||||
no matching manifest for linux/arm64/v8 in the manifest list entries
|
||||
</summary>
|
||||
On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`, this typically means that the datahub cli was not able to detect that you are running it on Apple Silicon. To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
|
||||
|
||||
</details>
|
||||
<details>
|
||||
<summary>
|
||||
Miscellaneous Docker issues
|
||||
</summary>
|
||||
There can be misc issues with Docker, like conflicting containers and dangling volumes, that can often be resolved by
|
||||
pruning your Docker state with the following command. Note that this command removes all unused containers, networks,
|
||||
images (both dangling and unreferenced), and optionally, volumes.
|
||||
|
||||
```
|
||||
docker system prune
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>
|
||||
Still stuck?
|
||||
</summary>
|
||||
|
||||
Hop over to our [Slack community](https://slack.datahubproject.io) and ask for help in the [#troubleshoot](https://datahubspace.slack.com/archives/C029A3M079U) channel!
|
||||
|
||||
</details>
|
||||
|
||||
## How can I confirm if all Docker containers are running as expected after a quickstart?
|
||||
|
||||
If you set up the `datahub` CLI tool (see [here](../metadata-ingestion/README.md)), you can use the built-in check utility:
|
||||
If you set up the `datahub` CLI tool (see [here](../../metadata-ingestion/README.md)), you can use the built-in check utility:
|
||||
|
||||
```shell
|
||||
datahub docker check
|
||||
```
|
||||
@ -28,11 +103,13 @@ ce14b9758eb3 mysql:5.7
|
||||
```
|
||||
|
||||
Also you can check individual Docker container logs by running `docker logs <<container_name>>`. For `datahub-gms`, you should see a log similar to this at the end of the initialization:
|
||||
|
||||
```
|
||||
2020-02-06 09:20:54.870:INFO:oejs.Server:main: Started @18807ms
|
||||
```
|
||||
|
||||
For `datahub-frontend-react`, you should see a log similar to this at the end of the initialization:
|
||||
|
||||
```
|
||||
09:20:22 [main] INFO play.core.server.AkkaHttpServer - Listening for HTTP on /0.0.0.0:9002
|
||||
```
|
||||
@ -40,13 +117,14 @@ For `datahub-frontend-react`, you should see a log similar to this at the end of
|
||||
## My elasticsearch or broker container exited with error or was stuck forever
|
||||
|
||||
If you're seeing errors like below, chances are you didn't give enough resource to docker. Please make sure to allocate at least 8GB of RAM + 2GB swap space.
|
||||
|
||||
```
|
||||
datahub-gms | 2020/04/03 14:34:26 Problem with request: Get http://elasticsearch:9200: dial tcp 172.19.0.5:9200: connect: connection refused. Sleeping 1s
|
||||
broker | [2020-04-03 14:34:42,398] INFO Client session timed out, have not heard from server in 6874ms for sessionid 0x10000023fa60002, closing socket connection and attempting reconnect (org.apache.zookeeper.ClientCnxn)
|
||||
schema-registry | [2020-04-03 14:34:48,518] WARN Client session timed out, have not heard from server in 20459ms for sessionid 0x10000023fa60007 (org.apache.zookeeper.ClientCnxn)
|
||||
```
|
||||
|
||||
## How can I check if [MXE](what/mxe.md) Kafka topics are created?
|
||||
## How can I check if [MXE](../what/mxe.md) Kafka topics are created?
|
||||
|
||||
You can use a utility like [kafkacat](https://github.com/edenhill/kafkacat) to list all topics.
|
||||
You can run below command to see the Kafka topics created in your Kafka broker.
|
||||
@ -106,9 +184,11 @@ docker exec -it mysql /usr/bin/mysql datahub --user=datahub --password=datahub
|
||||
Inspect the content of `metadata_aspect_v2` table, which contains the ingested aspects for all entities.
|
||||
|
||||
## Getting error while starting Docker containers
|
||||
|
||||
There can be different reasons why a container fails during initialization. Below are the most common reasons:
|
||||
|
||||
### `bind: address already in use`
|
||||
|
||||
This error means that the network port (which is supposed to be used by the failed container) is already in use on your system. You need to find and kill the process which is using this specific port before starting the corresponding Docker container. If you don't want to kill the process which is using that port, another option is to change the port number for the Docker container. You need to find and change the [ports](https://docs.docker.com/compose/compose-file/#ports) parameter for the specific Docker container in the `docker-compose.yml` configuration file.
|
||||
|
||||
```
|
||||
@ -119,37 +199,50 @@ ERROR: for mysql Cannot start service mysql: driver failed programming external
|
||||
1) sudo lsof -i :3306
|
||||
2) kill -15 <PID found in step1>
|
||||
```
|
||||
|
||||
### `OCI runtime create failed`
|
||||
|
||||
If you see an error message like below, please make sure to git update your local repo to HEAD.
|
||||
|
||||
```
|
||||
ERROR: for datahub-mae-consumer Cannot start service datahub-mae-consumer: OCI runtime create failed: container_linux.go:349: starting container process caused "exec: \"bash\": executable file not found in $PATH": unknown
|
||||
```
|
||||
|
||||
### `failed to register layer: devmapper: Unknown device`
|
||||
|
||||
This most means that you're out of disk space (see [#1879](https://github.com/datahub-project/datahub/issues/1879)).
|
||||
|
||||
### `ERROR: for kafka-rest-proxy Get https://registry-1.docker.io/v2/confluentinc/cp-kafka-rest/manifests/5.4.0: EOF`
|
||||
|
||||
This is most likely a transient issue with [Docker Registry](https://docs.docker.com/registry/). Retry again later.
|
||||
|
||||
## toomanyrequests: too many failed login attempts for username or IP address
|
||||
|
||||
Try the following
|
||||
|
||||
```bash
|
||||
rm ~/.docker/config.json
|
||||
docker login
|
||||
```
|
||||
|
||||
More discussions on the same issue https://github.com/docker/hub-feedback/issues/1250
|
||||
|
||||
## Seeing `Table 'datahub.metadata_aspect' doesn't exist` error when logging in
|
||||
|
||||
This means the database wasn't properly initialized as part of the quickstart processs (see [#1816](https://github.com/datahub-project/datahub/issues/1816)). Please run the following command to manually initialize it.
|
||||
|
||||
```
|
||||
docker exec -i mysql sh -c 'exec mysql datahub -udatahub -pdatahub' < docker/mysql/init.sql
|
||||
```
|
||||
|
||||
## I've messed up my docker setup. How do I start from scratch?
|
||||
|
||||
Run the following script to remove all the containers and volumes created during the quickstart tutorial. Note that you'll also lose all the data as a result.
|
||||
|
||||
```
|
||||
datahub docker nuke
|
||||
```
|
||||
|
||||
## I'm seeing exceptions in DataHub GMS container like "Caused by: java.lang.IllegalStateException: Duplicate key com.linkedin.metadata.entity.ebean.EbeanAspectV2@dd26e011". What do I do?
|
||||
|
||||
This is related to a SQL column collation issue. The default collation we previously used (prior to Oct 26, 2021) for URN fields was case-insensitive (utf8mb4_unicode_ci). We've recently moved
|
||||
@ -159,10 +252,10 @@ to deploying with a case-sensitive collation (utf8mb4_bin) by default. In order
|
||||
ALTER TABLE metadata_aspect_v2 CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||
```
|
||||
|
||||
## I've modified the default user.props file to include a custom username and password, but I don't see the new user(s) inside the Users & Groups tab. Why not?
|
||||
## I've modified the default user.props file to include a custom username and password, but I don't see the new user(s) inside the Users & Groups tab. Why not?
|
||||
|
||||
Currently, `user.props` is a file used by the JAAS PropertyFileLoginModule solely for the purpose of **Authentication**. The file is not used as an source from which to
|
||||
ingest additional metadata about the user. For that, you'll need to ingest some custom information about your new user using the Rest.li APIs or the [File-based ingestion source](./generated/ingestion/sources/file.md).
|
||||
ingest additional metadata about the user. For that, you'll need to ingest some custom information about your new user using the Rest.li APIs or the [File-based ingestion source](../generated/ingestion/sources/file.md).
|
||||
|
||||
For an example of a file that ingests user information, check out [single_mce.json](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/mce_files/single_mce.json), which ingests a single user object into DataHub. Notice that the "urn" field provided
|
||||
will need to align with the custom username you've provided in user.props file. For example, if your user.props file contains:
|
||||
@ -208,7 +301,7 @@ You'll need to ingest some metadata of the following form to see it inside the D
|
||||
}
|
||||
```
|
||||
|
||||
## I've configured OIDC, but I cannot login. I get continuously redirected. What do I do?
|
||||
## I've configured OIDC, but I cannot login. I get continuously redirected. What do I do?
|
||||
|
||||
Sorry to hear that!
|
||||
|
||||
@ -216,9 +309,8 @@ This phenomena may be due to the size of a Cookie DataHub uses to authenticate i
|
||||
|
||||
One solution is to use Play Cache to persist this session information for a user. This means the attributes about the user (and their session info) will be stored in an in-memory store in the `datahub-frontend` service, instead of a browser-side cookie.
|
||||
|
||||
To configure the Play Cache session store, you can set the env variable "PAC4J_SESSIONSTORE_PROVIDER" as "PlayCacheSessionStore" for the `datahub-frontend` container.
|
||||
To configure the Play Cache session store, you can set the env variable "PAC4J_SESSIONSTORE_PROVIDER" as "PlayCacheSessionStore" for the `datahub-frontend` container.
|
||||
|
||||
Do note that there are downsides to using the Play Cache. Specifically, it will make `datahub-frontend` a stateful server. If you have multiple instances of `datahub-frontend` deployed, you'll need to ensure that the same user is deterministically routed to the same service container (since the sessions are stored in memory). If you're using a single instance of `datahub-frontend` (the default), then things should "just work".
|
||||
Do note that there are downsides to using the Play Cache. Specifically, it will make `datahub-frontend` a stateful server. If you have multiple instances of `datahub-frontend` deployed, you'll need to ensure that the same user is deterministically routed to the same service container (since the sessions are stored in memory). If you're using a single instance of `datahub-frontend` (the default), then things should "just work".
|
||||
|
||||
For more details, please refer to https://github.com/datahub-project/datahub/pull/5114
|
||||
|
@ -1,5 +1,11 @@
|
||||
# Introduction to Metadata Ingestion
|
||||
|
||||
<a
|
||||
className='button button--primary button--lg'
|
||||
href="https://datahubproject.io/integrations">
|
||||
Find Integration Source
|
||||
</a>
|
||||
|
||||
## Integration Options
|
||||
|
||||
DataHub supports both **push-based** and **pull-based** metadata integration.
|
||||
|
Loading…
x
Reference in New Issue
Block a user