datahub/datahub-frontend/conf/application.conf
2025-06-15 10:52:16 -05:00

316 lines
14 KiB
Plaintext

# This is the main configuration file for the application.
# ~~~~~
# Secret key
# ~~~~~
# The secret key is used to secure cryptographics functions.
#
# This must be changed for production, but we recommend not changing it in this file.
#
# See http://www.playframework.com/documentation/latest/ApplicationSecret for more details.
play.http.secret.key = ${DATAHUB_SECRET}
# The application languages
# ~~~~~
play.i18n.langs = ["en"]
play.application.loader = play.inject.guice.GuiceApplicationLoader
# Play http parser settings
# #
# # Increase default buffer size to handle large post request
play.http.parser.maxMemoryBuffer = 10MB
play.http.parser.maxMemoryBuffer = ${?DATAHUB_PLAY_MEM_BUFFER_SIZE}
play.modules.disabled += "play.api.mvc.LegacyCookiesModule"
play.modules.disabled += "play.api.mvc.CookiesModule"
play.modules.enabled += "auth.cookie.CustomCookiesModule"
play.modules.enabled += "auth.AuthModule"
jwt {
# 'alg' https://tools.ietf.org/html/rfc7515#section-4.1.1
signatureAlgorithm = "HS256"
}
# We override the Akka server provider to allow setting the max header count to a higher value
# This is useful while using proxies like Envoy that result in the frontend server rejecting GMS
# responses as there's more than the max of 64 allowed headers
play.server.provider = server.CustomAkkaHttpServerProvider
play.http.server.akka.max-header-count = 64
play.http.server.akka.max-header-count = ${?DATAHUB_AKKA_MAX_HEADER_COUNT}
# max-header-size is reportedly no longer used
play.server.akka.max-header-size = 32k
play.server.akka.max-header-size = ${?DATAHUB_AKKA_MAX_HEADER_VALUE_LENGTH}
# max header value length seems to impact the actual limit
play.server.akka.max-header-value-length = 32k
play.server.akka.max-header-value-length = ${?DATAHUB_AKKA_MAX_HEADER_VALUE_LENGTH}
# Update AUTH_COOKIE_SAME_SITE and AUTH_COOKIE_SECURE in order to change how authentication cookies
# are configured. If you wish cookies to be sent in first and third party contexts, set
# AUTH_COOKIE_SAME_SITE = "NONE" and AUTH_COOKIE_SECURE = true. If AUTH_COOKIE_SAME_SITE is "NONE",
# AUTH_COOKIE_SECURE must be set to true.
play.http.session.sameSite = "LAX"
play.http.session.sameSite = ${?AUTH_COOKIE_SAME_SITE}
play.http.session.secure = false
play.http.session.secure = ${?AUTH_COOKIE_SECURE}
play.filters {
enabled = [
play.filters.gzip.GzipFilter
]
gzip {
contentType {
# If non empty, then a response will only be compressed if its content type is in this list.
whiteList = [ "text/*", "application/javascript", "application/json" ]
# The black list is only used if the white list is empty.
# Compress all responses except the ones whose content type is in this list.
blackList = []
}
}
}
# pac4j configuration
# default to PlayCookieSessionStore to keep datahub-frontend's statelessness
pac4j.sessionStore.provider= "PlayCookieSessionStore"
pac4j.sessionStore.provider= ${?PAC4J_SESSIONSTORE_PROVIDER}
# Database configuration
# ~~~~~
# You can declare as many datasources as you want.
# By convention, the default datasource is named `default`
#
# db.default.driver=org.h2.Driver
# db.default.url="jdbc:h2:mem:play"
# db.default.username=sa
# db.default.password=""
# Evolutions
# ~~~~~
# You can disable evolutions if needed
# play.evolutions.enabled=false
# You can disable evolutions for a specific datasource if necessary
# play.evolutions.db.default.enabled=false
app.version = 0.0.0
app.version = ${?DATAHUB_APP_VERSION}
dataset.hdfs_browser.link = ""
linkedin.internal = false
linkedin.show.dataset.lineage = true
linkedin.suggestion.confidence.threshold = "50"
tracking.piwik.siteid = "0"
tracking.piwik.siteid = ${?DATAHUB_PIWIK_SITEID}
tracking.piwik.url = ""
tracking.piwik.url = ${?DATAHUB_PIWIK_URL}
linkedin.links.avi.urlPrimary = ""
linkedin.links.avi.urlFallback = ""
links.wiki.appHelp = "https://github.com/datahub-project/datahub"
links.wiki.gdprPii = ""
links.wiki.tmsSchema = ""
links.wiki.gdprTaxonomy = ""
links.wiki.staleSearchIndex = ""
links.wiki.dht = ""
links.wiki.purgePolicies = ""
links.wiki.jitAcl = ""
links.wiki.metadataCustomRegex = ""
links.wiki.complianceOwner = ""
links.wiki.exportPolicy = ""
links.wiki.metadataHealth = ""
links.wiki.purgeKey = ""
links.wiki.datasetDecommission = ""
ui.show.ownership.revamp = true
ui.show.staging.banner = false
ui.show.people = true
ui.show.CM.banner = false
ui.show.CM.link = ""
ui.show.stale.search = true
ui.show.live.data.banner = false
ui.show.lineage.graph = true
ui.show.advanced.search = true
ui.show.institutional.memory = true
ui.new.browse.dataset = true
# React App Authentication
# ~~~~~
#
# Enable verbose authentication logging
#
auth.verbose.logging = false
auth.verbose.logging = ${?AUTH_VERBOSE_LOGGING}
# React currently supports OIDC SSO + self-configured JAAS for authentication. Below you can find the supported configurations for
# each mechanism.
#
# Required OIDC Configuration Values:
#
auth.oidc.enabled = ${?AUTH_OIDC_ENABLED} # Enable OIDC authentication, disabled by default
auth.oidc.clientId = ${?AUTH_OIDC_CLIENT_ID} # Unique client id issued by the identity provider
auth.oidc.clientSecret = ${?AUTH_OIDC_CLIENT_SECRET} # Unique client secret issued by the identity provider.
auth.oidc.discoveryUri = ${?AUTH_OIDC_DISCOVERY_URI} # The IdP OIDC discovery url.
auth.baseUrl = ${?AUTH_OIDC_BASE_URL} # The base URL associated with your DataHub deployment.
#
# Optional OIDC Configuration Values:
#
auth.oidc.userNameClaim = ${?AUTH_OIDC_USER_NAME_CLAIM} # The attribute / claim used to derive the DataHub username. Defaults to "preferred_username".
auth.oidc.userNameClaimRegex = ${?AUTH_OIDC_USER_NAME_CLAIM_REGEX} # The regex used to parse the DataHub username from the user name claim. Defaults to (.*) (all)
auth.oidc.scope = ${?AUTH_OIDC_SCOPE} # String representing the requested scope from the IdP. Defaults to "oidc email profile"
auth.oidc.clientAuthenticationMethod = ${?AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD} # Which authentication method to use to pass credentials (clientId and clientSecret) to the token endpoint: Defaults to "client_secret_basic"
auth.oidc.jitProvisioningEnabled = ${?AUTH_OIDC_JIT_PROVISIONING_ENABLED} # Whether DataHub users should be provisioned on login if they do not exist. Defaults to true.
auth.oidc.preProvisioningRequired = ${?AUTH_OIDC_PRE_PROVISIONING_REQUIRED} # Whether the user should already exist in DataHub on login, failing login if they are not. Defaults to false.
auth.oidc.extractGroupsEnabled = ${?AUTH_OIDC_EXTRACT_GROUPS_ENABLED} # Whether groups should be extracted from a claim in the OIDC profile. Only applies if JIT provisioning is enabled. Groups will be created if they do not exist. Defaults to true.
auth.oidc.groupsClaim = ${?AUTH_OIDC_GROUPS_CLAIM} # The OIDC claim to extract groups information from. Defaults to 'groups'
auth.oidc.responseType = ${?AUTH_OIDC_RESPONSE_TYPE}
auth.oidc.responseMode = ${?AUTH_OIDC_RESPONSE_MODE}
auth.oidc.useNonce = ${?AUTH_OIDC_USE_NONCE}
auth.oidc.customParam.resource = ${?AUTH_OIDC_CUSTOM_PARAM_RESOURCE}
auth.oidc.readTimeout = ${?AUTH_OIDC_READ_TIMEOUT}
auth.oidc.connectTimeout = ${?AUTH_OIDC_CONNECT_TIMEOUT}
auth.oidc.extractJwtAccessTokenClaims = ${?AUTH_OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS} # Whether to extract claims from JWT access token. Defaults to false.
auth.oidc.preferredJwsAlgorithm = ${?AUTH_OIDC_PREFERRED_JWS_ALGORITHM} # Which jws algorithm to use
auth.oidc.acrValues = ${?AUTH_OIDC_ACR_VALUES}
auth.oidc.grantType = ${?AUTH_OIDC_GRANT_TYPE}
#
# By default, the callback URL that should be registered with the identity provider is computed as {$baseUrl}/callback/oidc.
# For example, the default callback URL for a local deployment of DataHub would be "http://localhost:9002/callback/oidc".
# This callback URL should be registered with the identity provider during configuration.
#
#
# JAAS Optional Configuration Values
#
# In addition to OIDC, JAAS authentication with a dummy login module is enabled by default. Currently, this accepts
# any username / password combination as valid credentials. To disable this entry point altogether, specify the following config:
#
auth.jaas.enabled = ${?AUTH_JAAS_ENABLED}
auth.native.enabled = ${?AUTH_NATIVE_ENABLED}
auth.guest.enabled = ${?GUEST_AUTHENTICATION_ENABLED}
# The name of the guest user id
auth.guest.user = ${?GUEST_AUTHENTICATION_USER}
# The path to bypass login page and get logged in as guest
auth.guest.path = ${?GUEST_AUTHENTICATION_PATH}
# Enforces the usage of a valid email for user sign up
auth.native.signUp.enforceValidEmail = true
auth.native.signUp.enforceValidEmail = ${?ENFORCE_VALID_EMAIL}
#
# To disable all authentication to the app, and proxy all users through a master "datahub" account, make sure that,
# jaas, native and oidc auth are disabled:
#
# auth.jaas.enabled = false
# auth.native.enabled = false
# auth.oidc.enabled = false # (or simply omit oidc configurations)
# Login session expiration time, controls when the actor cookie is expired on the browser side
auth.session.ttlInHours = 24
auth.session.ttlInHours = ${?AUTH_SESSION_TTL_HOURS}
# Control the length of time a session token is valid
play.http.session.maxAge = 24h
play.http.session.maxAge = ${?MAX_SESSION_TOKEN_AGE}
analytics.enabled = true
analytics.enabled = ${?DATAHUB_ANALYTICS_ENABLED}
# Kafka Producer Configuration
analytics.kafka.bootstrap.server = ${KAFKA_BOOTSTRAP_SERVER}
analytics.tracking.topic = DataHubUsageEvent_v1
analytics.tracking.topic = ${?DATAHUB_TRACKING_TOPIC}
analytics.kafka.delivery.timeout.ms = 30000
analytics.kafka.delivery.timeout.ms = ${?KAFKA_PROPERTIES_DELIVERY_TIMEOUT_MS}
analytics.kafka.request.timeout.ms = 3000
analytics.kafka.request.timeout.ms = ${?KAFKA_PROPERTIES_REQUEST_TIMEOUT_MS}
# Kafka Producer SSL Configs. All must be provided to enable SSL.
analytics.kafka.security.protocol = ${?KAFKA_PROPERTIES_SECURITY_PROTOCOL}
analytics.kafka.ssl.key.password = ${?KAFKA_PROPERTIES_SSL_KEY_PASSWORD}
analytics.kafka.ssl.keystore.type = ${?KAFKA_PROPERTIES_SSL_KEYSTORE_TYPE}
analytics.kafka.ssl.keystore.location = ${?KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION}
analytics.kafka.ssl.keystore.password = ${?KAFKA_PROPERTIES_SSL_KEYSTORE_PASSWORD}
analytics.kafka.ssl.truststore.type = ${?KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE}
analytics.kafka.ssl.truststore.location = ${?KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION}
analytics.kafka.ssl.truststore.password = ${?KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD}
analytics.kafka.ssl.protocol = ${?KAFKA_PROPERTIES_SSL_PROTOCOL}
analytics.kafka.ssl.endpoint.identification.algorithm = ${?KAFKA_PROPERTIES_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM}
# If analytics.kafka.security.protocol is set to "SASL_SSL", both of these need to be set.
analytics.kafka.sasl.mechanism = ${?KAFKA_PROPERTIES_SASL_MECHANISM}
analytics.kafka.sasl.jaas.config = ${?KAFKA_PROPERTIES_SASL_JAAS_CONFIG}
analytics.kafka.sasl.kerberos.service.name = ${?KAFKA_PROPERTIES_SASL_KERBEROS_SERVICE_NAME}
analytics.kafka.sasl.login.callback.handler.class = ${?KAFKA_PROPERTIES_SASL_LOGIN_CALLBACK_HANDLER_CLASS}
analytics.kafka.sasl.client.callback.handler.class = ${?KAFKA_PROPERTIES_SASL_CLIENT_CALLBACK_HANDLER_CLASS}
analytics.kafka.sasl.oauthbearer.token.endpoint.url = ${?KAFKA_PROPERTIES_SASL_OAUTHBEARER_TOKEN_ENDPOINT_URL}
# Required Elastic Client Configuration
analytics.elastic.host = ${?ELASTIC_CLIENT_HOST}
analytics.elastic.port = ${?ELASTIC_CLIENT_PORT}
# Optional Elastic Client Configurations
analytics.elastic.threadCount = ${?ELASTIC_CLIENT_THREAD_COUNT}
analytics.elastic.connectionRequestTimeout = ${?ELASTIC_CLIENT_CONNECTION_REQUEST_TIMEOUT}
# Elastic Client SSL Configs. Required if analytics.elastic.useSSL is true.
analytics.elastic.useSSL = ${?ELASTIC_CLIENT_USE_SSL}
analytics.elastic.sslContext.sslProtocol = ${?ELASTIC_CLIENT_SSL_PROTOCOL}
analytics.elastic.sslContext.sslSecureRandomImplementation = ${?ELASTIC_CLIENT_SSL_SECURE_RANDOM_IMPLEMENTATION}
analytics.elastic.sslContext.sslTrustStoreFile = ${?ELASTIC_CLIENT_SSL_TRUST_STORE_FILE}
analytics.elastic.sslContext.sslTrustStoreType = ${?ELASTIC_CLIENT_SSL_TRUST_STORE_TYPE}
analytics.elastic.sslContext.sslTrustStorePassword = ${?ELASTIC_CLIENT_SSL_TRUST_STORE_PASSWORD}
analytics.elastic.sslContext.sslKeyStoreFile = ${?ELASTIC_CLIENT_SSL_KEY_STORE_FILE}
analytics.elastic.sslContext.sslKeyStoreType = ${?ELASTIC_CLIENT_SSL_KEY_STORE_TYPE}
analytics.elastic.sslContext.sslKeyStorePassword = ${?ELASTIC_CLIENT_SSL_KEY_STORE_PASSWORD}
# Optional username + password for use with SSL
analytics.elastic.username = ${?ELASTIC_CLIENT_USERNAME}
analytics.elastic.password = ${?ELASTIC_CLIENT_PASSWORD}
analytics.elastic.indexPrefix = ${?ELASTIC_INDEX_PREFIX}
# Metadata Service Configs
metadataService.host=${?DATAHUB_GMS_HOST}
metadataService.port=${?DATAHUB_GMS_PORT}
metadataService.useSsl=${?DATAHUB_GMS_USE_SSL} # Internal SSL is not fully supported yet.
# Set to "true" to enable Metadata Service Authentication. False BY DEFAULT.
metadataService.auth.enabled=${?METADATA_SERVICE_AUTH_ENABLED}
# Required when metadataService.auth.enabled is "true". Provides a secure identifier for datahub-frontend that the
# Metadata Service trusts for generating Access Tokens on behalf of a particular user.
# This MUST match the configurations of the same name for the Metadata Service.
systemClientId = __datahub_system # Change this to something random.
systemClientSecret = JohnSnowKnowsNothing # Along with this.
systemClientId=${?DATAHUB_SYSTEM_CLIENT_ID}
systemClientSecret=${?DATAHUB_SYSTEM_CLIENT_SECRET}
entityClient.retryInterval = 2
entityClient.retryInterval = ${?ENTITY_CLIENT_RETRY_INTERVAL}
entityClient.numRetries = 3
entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES}
entityClient.restli.get.batchSize = 50
entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE}
entityClient.restli.get.batchConcurrency = 2
entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY}
# Enable verbose authentication logging
graphql.verbose.logging = false
graphql.verbose.logging = ${?GRAPHQL_VERBOSE_LOGGING}
graphql.verbose.slowQueryMillis = 2500
graphql.verbose.slowQueryMillis = ${?GRAPHQL_VERBOSE_LONG_QUERY_MILLIS}