Copy document metadata from source document into output (untested)

This works for ASCII only; will do Unicode version.
This commit is contained in:
Jim Barlow 2015-07-25 15:31:02 -07:00
parent 72e5fa9ba0
commit abf2e7e9bb
2 changed files with 23 additions and 6 deletions

View File

@ -596,7 +596,18 @@ def generate_postscript_stub(
input_file,
output_file,
log):
generate_pdfa_def(output_file)
try:
pdf = pypdf.PdfFileReader(input_file)
pdfmark = {
'title': pdf.documentInfo['/Title'],
'author': pdf.documentInfo['/Author'],
'keywords': pdf.documentInfo['/Keywords'],
'subject': pdf.documentInfo['/Subject']
}
except KeyError:
pdfmark = {}
generate_pdfa_def(output_file, pdfmark)
@transform(

View File

@ -25,6 +25,9 @@ pdfa_def_template = u"""%!
def
[ /Title ($pdf_title)
/Author ($pdf_author)
/Subject ($pdf_subject)
/Keywords ($pdf_keywords)
/DOCINFO pdfmark
% Define an ICC profile :
@ -57,11 +60,14 @@ def
"""
def _get_pdfa_def(icc_profile, pdf_title, icc_identifier):
def _get_pdfa_def(icc_profile, icc_identifier, pdfmark):
t = Template(pdfa_def_template)
result = t.substitute(icc_profile=icc_profile,
pdf_title=pdf_title,
icc_identifier=icc_identifier)
icc_identifier=icc_identifier,
pdf_title=pdfmark.get('title', ''),
pdf_author=pdfmark.get('author', ''),
pdf_subject=pdfmark.get('subject', ''),
pdf_keywords=pdfmark.get('keywords', ''))
return result
@ -91,13 +97,13 @@ def _get_postscript_icc_path():
return path
def generate_pdfa_def(target_filename, pdf_title='', icc='sRGB'):
def generate_pdfa_def(target_filename, pdfmark, icc='sRGB'):
if icc == 'sRGB':
icc_profile = os.path.join(_get_postscript_icc_path(), 'srgb.icc')
else:
raise NotImplementedError("Only supporting sRGB")
ps = _get_pdfa_def(icc_profile, pdf_title, icc)
ps = _get_pdfa_def(icc_profile, icc, pdfmark)
# Since PostScript might not handle UTF-8 (it's hard to get a clear
# answer), insist on ascii