Drop libxml2 dependency

It seems that Python's internal XML parser is good enough to do the job.
This commit is contained in:
James R. Barlow 2015-08-17 15:26:07 -07:00
parent 53c88093ad
commit 2dff3e07ce
4 changed files with 7 additions and 5 deletions

View File

@ -96,7 +96,6 @@ Install dependencies::
sudo apt-get install \ sudo apt-get install \
zlib1g-dev \ zlib1g-dev \
libjpeg-dev \ libjpeg-dev \
libxml2 \
tesseract-ocr \ tesseract-ocr \
qpdf \ qpdf \
unpaper \ unpaper \

View File

@ -47,6 +47,7 @@ Changes
- MuPDF_ tools - MuPDF_ tools
- shell scripts - shell scripts
- Java and JHOVE_ - Java and JHOVE_
- libxml2
- Some new external dependencies are required or optional, compared to v2.x: - Some new external dependencies are required or optional, compared to v2.x:
@ -66,6 +67,10 @@ Changes
Release candidates Release candidates
------------------ ------------------
- rc6:
- dropped libxml2 (Python lxml) since Python 3's internal XML parser is sufficient
- rc5: - rc5:
- dropped Java and JHOVE in favour of qpdf - dropped Java and JHOVE in favour of qpdf

View File

@ -9,7 +9,7 @@
############################################################################## ##############################################################################
from reportlab.pdfgen.canvas import Canvas from reportlab.pdfgen.canvas import Canvas
from reportlab.lib.units import inch from reportlab.lib.units import inch
from lxml import etree as ElementTree from xml.etree import ElementTree
from PIL import Image from PIL import Image
from collections import namedtuple from collections import namedtuple
import re import re
@ -35,8 +35,7 @@ class HocrTransform():
self.dpi = dpi self.dpi = dpi
self.boxPattern = re.compile(r'bbox((\s+\d+){4})') self.boxPattern = re.compile(r'bbox((\s+\d+){4})')
self.hocr = ElementTree.ElementTree() self.hocr = ElementTree.parse(hocrFileName)
self.hocr.parse(hocrFileName)
# if the hOCR file has a namespace, ElementTree requires its use to # if the hOCR file has a namespace, ElementTree requires its use to
# find elements # find elements

View File

@ -203,7 +203,6 @@ setup(
install_requires=[ install_requires=[
'ruffus>=2.6.3', 'ruffus>=2.6.3',
'Pillow>=2.4.0', 'Pillow>=2.4.0',
'lxml>=3.3.3',
'reportlab>=3.1.44', 'reportlab>=3.1.44',
'PyPDF2>=1.25.1' 'PyPDF2>=1.25.1'
], ],