mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-28 02:55:55 +00:00
feat(ingest/csv-enrich): handle BOM character (#6509)
This has come up a number of times e.g. https://datahubspace.slack.com/archives/C029A3M079U/p1669000226732159?thread_ts=1669000226.732159&cid=C029A3M079U
This commit is contained in:
parent
74cc88f2df
commit
3fdaf13f2c
@ -534,7 +534,12 @@ class CSVEnricherSource(Source):
|
||||
return owners
|
||||
|
||||
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
||||
with open(self.config.filename, "r") as f:
|
||||
# As per https://stackoverflow.com/a/49150749/5004662, we want to use
|
||||
# the 'utf-8-sig' encoding to handle any BOM character that may be
|
||||
# present in the file. Excel is known to add a BOM to CSV files.
|
||||
# As per https://stackoverflow.com/a/63508823/5004662,
|
||||
# this is also safe with normal files that don't have a BOM.
|
||||
with open(self.config.filename, mode="r", encoding="utf-8-sig") as f:
|
||||
rows = csv.DictReader(f, delimiter=self.config.delimiter)
|
||||
for row in rows:
|
||||
# We need the resource to move forward
|
||||
|
Loading…
x
Reference in New Issue
Block a user