diff --git a/metadata-ingestion/source_docs/data_lake.md b/metadata-ingestion/source_docs/data_lake.md
index a2a702e4e7..6042ca3056 100644
--- a/metadata-ingestion/source_docs/data_lake.md
+++ b/metadata-ingestion/source_docs/data_lake.md
@@ -112,6 +112,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
 | `profile_patterns.deny`                              |                          |              | List of regex patterns for tables to not profile (a must also be ingested for profiling). Defaults to none.                                                                                                    |
 | `profile_patterns.ignoreCase`                        |                          | `True`       | Whether to ignore case sensitivity during pattern matching of tables to profile.                                                                                                                               |
 | `profiling.enabled`                                  |                          | `False`      | Whether profiling should be done.                                                                                                                                                                              |
+| `profiling.spark_cluster_manager`                    |                          | `None`       | Spark master URL. See [Spark docs](https://spark.apache.org/docs/latest/submitting-applications.html#master-urls) for details.                                                                                 |
 | `profiling.profile_table_level_only`                 |                          | `False`      | Whether to perform profiling at table-level only or include column-level profiling as well.                                                                                                                    |
 | `profiling.max_number_of_fields_to_profile`          |                          | `None`       | A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up. |
 | `profiling.include_field_null_count`                 |                          | `True`       | Whether to profile for the number of nulls for each column.                                                                                                                                                    |
diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake/__init__.py
index ee3dabbf34..259b36889c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/data_lake/__init__.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake/__init__.py
@@ -179,6 +179,10 @@ class DataLakeSource(Source):
 
         conf = SparkConf()
 
+        # None by default, which corresponds to local
+        if self.source_config.profiling.spark_cluster_manager:
+            conf.setMaster(self.source_config.profiling.spark_cluster_manager)
+
         conf.set(
             "spark.jars.packages",
             ",".join(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake/profiling.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake/profiling.py
index 3c31e2c4df..845fe8f1cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/data_lake/profiling.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake/profiling.py
@@ -62,6 +62,7 @@ def null_str(value: Any) -> Optional[str]:
 class DataLakeProfilerConfig(ConfigModel):
     enabled: bool = False
 
+    spark_cluster_manager: Optional[str] = None
     # These settings will override the ones below.
     profile_table_level_only: bool = False