diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d9d38904..9386d9711 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,13 @@
+## 0.6.8
+
+### Enhancements
+
+### Features
+
+* Add `partition_csv` for CSV files.
+
+### Fixes
+
## 0.6.7
### Enhancements
diff --git a/README.md b/README.md
index 8fa558c94..756d80c79 100644
--- a/README.md
+++ b/README.md
@@ -184,7 +184,7 @@ You can run this [Colab notebook](https://colab.research.google.com/drive/1U8VCj
The following examples show how to get started with the `unstructured` library.
You can parse **TXT**, **HTML**, **XML**, **PDF**, **EML**, **MSG**, **RTF**, **EPUB**, **DOC**, **DOCX**,
-**XLSX**, **ODT**, **PPT**, **PPTX**, **JPG**,
+**XLSX**, **CSV**, **ODT**, **PPT**, **PPTX**, **JPG**,
and **PNG** documents with one line of code!
See our [documentation page](https://unstructured-io.github.io/unstructured) for a full description
diff --git a/docs/source/bricks.rst b/docs/source/bricks.rst
index 496c66020..56cd6bd7e 100644
--- a/docs/source/bricks.rst
+++ b/docs/source/bricks.rst
@@ -83,7 +83,7 @@ If you call the ``partition`` function, ``unstructured`` will attempt to detect
file type and route it to the appropriate partitioning brick. All partitioning bricks
called within ``partition`` are called using the default kwargs. Use the document-type
specific bricks if you need to apply non-default settings.
-``partition`` currently supports ``.docx``, ``.doc``, ``.odt``, ``.pptx``, ``.ppt``, ``.xlsx``, ``.eml``, ``.msg``, ``.rtf``, ``.epub``, ``.html``, ``.xml``, ``.pdf``,
+``partition`` currently supports ``.docx``, ``.doc``, ``.odt``, ``.pptx``, ``.ppt``, ``.xlsx``, ``.csv``, ``.eml``, ``.msg``, ``.rtf``, ``.epub``, ``.html``, ``.xml``, ``.pdf``,
``.png``, ``.jpg``, and ``.txt`` files.
If you set the ``include_page_breaks`` kwarg to ``True``, the output will include page breaks. This is only supported for ``.pptx``, ``.html``, ``.pdf``,
``.png``, and ``.jpg``.
@@ -269,6 +269,23 @@ Examples:
print(elements[0].metadata.text_as_html)
+``partition_csv``
+------------------
+
+The ``partition_csv`` function pre-processes CSV files. The output is a single
+``Table`` element. The ``text_as_html`` attribute in the element metadata will
+contain an HTML representation of the table.
+
+Examples:
+
+.. code:: python
+
+ from unstructured.partition.csv import partition_csv
+
+ elements = partition_csv(filename="example-docs/stanley-cups.csv")
+ print(elements[0].metadata.text_as_html)
+
+
``partition_odt``
------------------
diff --git a/example-docs/stanley-cups.csv b/example-docs/stanley-cups.csv
new file mode 100644
index 000000000..4414023f0
--- /dev/null
+++ b/example-docs/stanley-cups.csv
@@ -0,0 +1,5 @@
+Stanley Cups,,
+Team,Location,Stanley Cups
+Blues,STL,1
+Flyers,PHI,2
+Maple Leafs,TOR,13
\ No newline at end of file
diff --git a/test_unstructured/file_utils/test_filetype.py b/test_unstructured/file_utils/test_filetype.py
index 76c6ced3d..16169abfa 100644
--- a/test_unstructured/file_utils/test_filetype.py
+++ b/test_unstructured/file_utils/test_filetype.py
@@ -36,6 +36,7 @@ XLSX_MIME_TYPES = [
("example-10k.html", FileType.HTML),
("fake-html.html", FileType.HTML),
("stanley-cups.xlsx", FileType.XLSX),
+ ("stanley-cups.csv", FileType.CSV),
("fake-power-point.pptx", FileType.PPTX),
("winter-sports.epub", FileType.EPUB),
("spring-weather.html.json", FileType.JSON),
@@ -59,6 +60,7 @@ def test_detect_filetype_from_filename(file, expected):
("example-10k.html", FileType.HTML),
("fake-html.html", FileType.HTML),
("stanley-cups.xlsx", FileType.XLSX),
+ ("stanley-cups.csv", FileType.CSV),
("fake-power-point.pptx", FileType.PPTX),
("winter-sports.epub", FileType.EPUB),
("fake-doc.rtf", FileType.RTF),
@@ -94,6 +96,7 @@ def test_detect_filetype_from_filename_with_extension(monkeypatch, file, expecte
("example-10k.html", [FileType.HTML, FileType.XML]),
("fake-html.html", FileType.HTML),
("stanley-cups.xlsx", FileType.XLSX),
+ ("stanley-cups.csv", FileType.CSV),
("fake-power-point.pptx", FileType.PPTX),
("winter-sports.epub", FileType.EPUB),
],
diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py
index 7174f42a9..398351911 100644
--- a/test_unstructured/partition/test_auto.py
+++ b/test_unstructured/partition/test_auto.py
@@ -693,3 +693,21 @@ def test_auto_partition_xlsx_from_file(filename="example-docs/stanley-cups.xlsx"
assert elements[0].metadata.text_as_html == EXPECTED_XLSX_TABLE
assert elements[0].metadata.page_number == 1
assert elements[0].metadata.filetype == EXPECTED_XLSX_FILETYPE
+
+
+def test_auto_partition_csv_from_filename(filename="example-docs/stanley-cups.csv"):
+ elements = partition(filename=filename)
+
+ assert clean_extra_whitespace(elements[0].text) == EXPECTED_XLSX_TEXT
+ assert elements[0].metadata.text_as_html == EXPECTED_XLSX_TABLE
+ assert elements[0].metadata.filetype == "text/csv"
+
+
+def test_auto_partition_csv_from_file(filename="example-docs/stanley-cups.csv"):
+ with open(filename, "rb") as f:
+ elements = partition(file=f)
+
+ assert clean_extra_whitespace(elements[0].text) == EXPECTED_XLSX_TEXT
+ assert isinstance(elements[0], Table)
+ assert elements[0].metadata.text_as_html == EXPECTED_XLSX_TABLE
+ assert elements[0].metadata.filetype == "text/csv"
diff --git a/test_unstructured/partition/test_csv.py b/test_unstructured/partition/test_csv.py
new file mode 100644
index 000000000..db5e1a418
--- /dev/null
+++ b/test_unstructured/partition/test_csv.py
@@ -0,0 +1,60 @@
+from unstructured.cleaners.core import clean_extra_whitespace
+from unstructured.documents.elements import Table
+from unstructured.partition.csv import partition_csv
+
+EXPECTED_TABLE = """
Team | +Location | +Stanley Cups | +
Blues | +STL | +1 | +
Flyers | +PHI | +2 | +
Maple Leafs | +TOR | +13 | +