diff --git a/docs/_src/api/api/preprocessor.md b/docs/_src/api/api/preprocessor.md
index c92599f6a..bc9888d28 100644
--- a/docs/_src/api/api/preprocessor.md
+++ b/docs/_src/api/api/preprocessor.md
@@ -37,7 +37,7 @@ class PreProcessor(BasePreProcessor)
 #### process
 
 ```python
-def process(documents: Union[dict, List[dict]], clean_whitespace: Optional[bool] = None, clean_header_footer: Optional[bool] = None, clean_empty_lines: Optional[bool] = None, split_by: Optional[str] = None, split_length: Optional[int] = None, split_overlap: Optional[int] = None, split_respect_sentence_boundary: Optional[bool] = None) -> List[dict]
+def process(documents: Union[dict, List[dict]], clean_whitespace: Optional[bool] = None, clean_header_footer: Optional[bool] = None, clean_empty_lines: Optional[bool] = None, remove_substrings: List[str] = [], split_by: Optional[str] = None, split_length: Optional[int] = None, split_overlap: Optional[int] = None, split_respect_sentence_boundary: Optional[bool] = None) -> List[dict]
 ```
 
 Perform document cleaning and splitting. Can take a single document or a list of documents as input and returns a list of documents.
@@ -47,7 +47,7 @@ Perform document cleaning and splitting. Can take a single document or a list of
 #### clean
 
 ```python
-def clean(document: dict, clean_whitespace: bool, clean_header_footer: bool, clean_empty_lines: bool) -> dict
+def clean(document: dict, clean_whitespace: bool, clean_header_footer: bool, clean_empty_lines: bool, remove_substrings: List[str]) -> dict
 ```
 
 Perform document cleaning on a single document and return a single document. This method will deal with whitespaces, headers, footers
diff --git a/haystack/nodes/preprocessor/preprocessor.py b/haystack/nodes/preprocessor/preprocessor.py
index 346227782..57229b64a 100644
--- a/haystack/nodes/preprocessor/preprocessor.py
+++ b/haystack/nodes/preprocessor/preprocessor.py
@@ -43,6 +43,7 @@ class PreProcessor(BasePreProcessor):
         clean_whitespace: bool = True,
         clean_header_footer: bool = False,
         clean_empty_lines: bool = True,
+        remove_substrings: List[str] = [],
         split_by: str = "word",
         split_length: int = 200,
         split_overlap: int = 0,
@@ -56,6 +57,7 @@ class PreProcessor(BasePreProcessor):
                                      or similar.
         :param clean_whitespace: Strip whitespaces before or after each line in the text.
         :param clean_empty_lines: Remove more than two empty lines in the text.
+        :param remove_substrings: Remove specified substrings from the text.
         :param split_by: Unit for splitting the document. Can be "word", "sentence", or "passage". Set to None to disable splitting.
         :param split_length: Max. number of the above split unit (e.g. words) that are allowed in one document. For instance, if n -> 10 & split_by ->
                            "sentence", then each output document will have 10 sentences.
@@ -76,6 +78,7 @@ class PreProcessor(BasePreProcessor):
             clean_whitespace=clean_whitespace,
             clean_header_footer=clean_header_footer,
             clean_empty_lines=clean_empty_lines,
+            remove_substrings=remove_substrings,
             split_by=split_by,
             split_length=split_length,
             split_overlap=split_overlap,
@@ -90,6 +93,7 @@ class PreProcessor(BasePreProcessor):
         self.clean_whitespace = clean_whitespace
         self.clean_header_footer = clean_header_footer
         self.clean_empty_lines = clean_empty_lines
+        self.remove_substrings = remove_substrings
         self.split_by = split_by
         self.split_length = split_length
         self.split_overlap = split_overlap
@@ -103,6 +107,7 @@ class PreProcessor(BasePreProcessor):
         clean_whitespace: Optional[bool] = None,
         clean_header_footer: Optional[bool] = None,
         clean_empty_lines: Optional[bool] = None,
+        remove_substrings: List[str] = [],
         split_by: Optional[str] = None,
         split_length: Optional[int] = None,
         split_overlap: Optional[int] = None,
@@ -117,6 +122,7 @@ class PreProcessor(BasePreProcessor):
             "clean_whitespace": clean_whitespace,
             "clean_header_footer": clean_header_footer,
             "clean_empty_lines": clean_empty_lines,
+            "remove_substrings": remove_substrings,
             "split_by": split_by,
             "split_length": split_length,
             "split_overlap": split_overlap,
@@ -141,6 +147,7 @@ class PreProcessor(BasePreProcessor):
         clean_whitespace: Optional[bool] = None,
         clean_header_footer: Optional[bool] = None,
         clean_empty_lines: Optional[bool] = None,
+        remove_substrings: List[str] = [],
         split_by: Optional[str] = None,
         split_length: Optional[int] = None,
         split_overlap: Optional[int] = None,
@@ -153,6 +160,8 @@ class PreProcessor(BasePreProcessor):
             clean_header_footer = self.clean_header_footer
         if clean_empty_lines is None:
             clean_empty_lines = self.clean_empty_lines
+        if not remove_substrings:
+            remove_substrings = self.remove_substrings
         if split_by is None:
             split_by = self.split_by
         if split_length is None:
@@ -167,6 +176,7 @@ class PreProcessor(BasePreProcessor):
             clean_whitespace=clean_whitespace,
             clean_header_footer=clean_header_footer,
             clean_empty_lines=clean_empty_lines,
+            remove_substrings=remove_substrings,
         )
         split_documents = self.split(
             document=cleaned_document,
@@ -181,7 +191,14 @@ class PreProcessor(BasePreProcessor):
         nested_docs = [self._process_single(d, **kwargs) for d in tqdm(documents, unit="docs")]
         return [d for x in nested_docs for d in x]
 
-    def clean(self, document: dict, clean_whitespace: bool, clean_header_footer: bool, clean_empty_lines: bool) -> dict:
+    def clean(
+        self,
+        document: dict,
+        clean_whitespace: bool,
+        clean_header_footer: bool,
+        clean_empty_lines: bool,
+        remove_substrings: List[str],
+    ) -> dict:
         """
         Perform document cleaning on a single document and return a single document. This method will deal with whitespaces, headers, footers
         and empty lines. Its exact functionality is defined by the parameters passed into PreProcessor.__init__().
@@ -204,6 +221,9 @@ class PreProcessor(BasePreProcessor):
         if clean_empty_lines:
             text = re.sub(r"\n\n+", "\n\n", text)
 
+        for substring in remove_substrings:
+            text = text.replace(substring, "")
+
         document["content"] = text
         return document
 
diff --git a/json-schemas/haystack-pipeline-1.1.0.schema.json b/json-schemas/haystack-pipeline-1.1.0.schema.json
index 6352802fc..088561ddc 100644
--- a/json-schemas/haystack-pipeline-1.1.0.schema.json
+++ b/json-schemas/haystack-pipeline-1.1.0.schema.json
@@ -1381,6 +1381,14 @@
               "default": true,
               "type": "boolean"
             },
+            "remove_substrings": {
+              "title": "Remove Substrings",
+              "default": [],
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
             "split_by": {
               "title": "Split By",
               "default": "word",
diff --git a/json-schemas/haystack-pipeline-1.2.1rc0.schema.json b/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
index 6312ed505..45353fcc7 100644
--- a/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
+++ b/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
@@ -1432,6 +1432,14 @@
               "default": true,
               "type": "boolean"
             },
+            "remove_substrings": {
+              "title": "Remove Substrings",
+              "default": [],
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
             "split_by": {
               "title": "Split By",
               "default": "word",
diff --git a/test/test_preprocessor.py b/test/test_preprocessor.py
index a3f8c6c05..2e56a9c7e 100644
--- a/test/test_preprocessor.py
+++ b/test/test_preprocessor.py
@@ -88,3 +88,23 @@ def test_clean_header_footer():
 
     assert "This is a header." not in documents[0]["content"]
     assert "footer" not in documents[0]["content"]
+
+
+def test_remove_substrings():
+    document = Document("This is a header. Some additional text. wiki. Some emoji ✨ 🪲 Weird whitespace\b\b\b.")
+
+    # check that the file contains the substrings we are about to remove
+    assert "This is a header." in document["content"]
+    assert "wiki" in document["content"]
+    assert "🪲" in document["content"]
+    assert "whitespace" in document["content"]
+    assert "✨" in document["content"]
+
+    preprocessor = PreProcessor(remove_substrings=["This is a header.", "wiki", "🪲"])
+    documents = preprocessor.process(document)
+
+    assert "This is a header." not in document["content"]
+    assert "wiki" not in document["content"]
+    assert "🪲" not in document["content"]
+    assert "whitespace" in document["content"]
+    assert "✨" in document["content"]