Drop libxml2 dependency

It seems that Python's internal XML parser is good enough to do the job.
This commit is contained in:
James R. Barlow 2015-08-17 15:26:07 -07:00
parent 53c88093ad
commit 2dff3e07ce
4 changed files with 7 additions and 5 deletions

View File

@ -96,7 +96,6 @@ Install dependencies::
sudo apt-get install \
zlib1g-dev \
libjpeg-dev \
libxml2 \
tesseract-ocr \
qpdf \
unpaper \

View File

@ -47,6 +47,7 @@ Changes
- MuPDF_ tools
- shell scripts
- Java and JHOVE_
- libxml2
- Some new external dependencies are required or optional, compared to v2.x:
@ -66,6 +67,10 @@ Changes
Release candidates
------------------
- rc6:
- dropped libxml2 (Python lxml) since Python 3's internal XML parser is sufficient
- rc5:
- dropped Java and JHOVE in favour of qpdf

View File

@ -9,7 +9,7 @@
##############################################################################
from reportlab.pdfgen.canvas import Canvas
from reportlab.lib.units import inch
from lxml import etree as ElementTree
from xml.etree import ElementTree
from PIL import Image
from collections import namedtuple
import re
@ -35,8 +35,7 @@ class HocrTransform():
self.dpi = dpi
self.boxPattern = re.compile(r'bbox((\s+\d+){4})')
self.hocr = ElementTree.ElementTree()
self.hocr.parse(hocrFileName)
self.hocr = ElementTree.parse(hocrFileName)
# if the hOCR file has a namespace, ElementTree requires its use to
# find elements

View File

@ -203,7 +203,6 @@ setup(
install_requires=[
'ruffus>=2.6.3',
'Pillow>=2.4.0',
'lxml>=3.3.3',
'reportlab>=3.1.44',
'PyPDF2>=1.25.1'
],