#!/usr/bin/env python3 # SPDX-FileCopyrightText: 2019 James R. Barlow # SPDX-License-Identifier: AGPL-3.0-or-later """This is a simple web service/HTTP wrapper for OCRmyPDF This may be more convenient than the command line tool for some Docker users. Note that OCRmyPDF uses Ghostscript, which is licensed under AGPLv3+. While OCRmyPDF is under GPLv3, this file is distributed under the Affero GPLv3+ license, to emphasize that SaaS deployments should make sure they comply with Ghostscript's license as well as OCRmyPDF's. """ from __future__ import annotations import os import shlex from subprocess import PIPE, run from tempfile import TemporaryDirectory from flask import Flask, Response, request, send_from_directory from werkzeug.utils import secure_filename app = Flask(__name__) app.secret_key = "secret" app.config['MAX_CONTENT_LENGTH'] = 50_000_000 app.config.from_envvar("OCRMYPDF_WEBSERVICE_SETTINGS", silent=True) ALLOWED_EXTENSIONS = {"pdf"} def allowed_file(filename): return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS def do_ocrmypdf(file): uploaddir = TemporaryDirectory(prefix="ocrmypdf-upload") downloaddir = TemporaryDirectory(prefix="ocrmypdf-download") filename = secure_filename(file.filename) up_file = os.path.join(uploaddir.name, filename) file.save(up_file) down_file = os.path.join(downloaddir.name, filename) cmd_args = [arg for arg in shlex.split(request.form["params"])] if "--sidecar" in cmd_args: return Response("--sidecar not supported", 501, mimetype='text/plain') ocrmypdf_args = ["ocrmypdf", *cmd_args, up_file, down_file] proc = run(ocrmypdf_args, capture_output=True, encoding="utf-8") if proc.returncode != 0: stderr = proc.stderr return Response(stderr, 400, mimetype='text/plain') return send_from_directory(downloaddir.name, filename) @app.route("/", methods=["GET", "POST"]) def upload_file(): if request.method == "POST": if "file" not in request.files: return Response("No file in POST", 400, mimetype='text/plain') file = request.files["file"] if file.filename == "": return Response("Empty filename", 400, mimetype='text/plain') if not allowed_file(file.filename): return Response("Invalid filename", 400, mimetype='text/plain') if file and allowed_file(file.filename): return do_ocrmypdf(file) return Response("Some other problem", 400, mimetype='text/plain') return """ OCRmyPDF webservice

Upload a PDF (debug UI)

Notice

This is a webservice wrapper for OCRmyPDF.

Copyright 2019 James R. Barlow

This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.

""" if __name__ == "__main__": app.run(host='0.0.0.0', port=5000)