summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2019-08-16 09:58:52 +0100
committerSean Whitton <spwhitton@spwhitton.name>2019-08-16 09:58:52 +0100
commit189a50afe3fb137255267becf388b5b2d989e147 (patch)
tree16bb6f2b84b55985515cb743b76d7d809ea4b1f4
parent2bbc4a5ff808ed8fdd1c9b7507cab6ba743a2707 (diff)
Commit Debian 3.0 (quilt) metadata
[dgit (9.6~bpo10+1) quilt-fixup]
-rw-r--r--debian/patches/debian-changes138
1 files changed, 53 insertions, 85 deletions
diff --git a/debian/patches/debian-changes b/debian/patches/debian-changes
index b66c7d86..6a7c05fe 100644
--- a/debian/patches/debian-changes
+++ b/debian/patches/debian-changes
@@ -9,9 +9,9 @@ patches. To obtain a patch queue for package version 1.2.3-1:
% git log --oneline 1.2.3..debian/1.2.3-1 -- . ':!debian'
See dgit(1), dgit(7) and dgit-maint-merge(7) for more information.
---- ocrmypdf-8.0.1+dfsg.orig/docs/index.rst
-+++ ocrmypdf-8.0.1+dfsg/docs/index.rst
-@@ -16,7 +16,6 @@ PDF is the best format for storing and e
+--- ocrmypdf-9.0.1+dfsg.orig/docs/index.rst
++++ ocrmypdf-9.0.1+dfsg/docs/index.rst
+@@ -11,7 +11,6 @@ PDF is the best format for storing and e
introduction
release_notes
@@ -19,27 +19,25 @@ See dgit(1), dgit(7) and dgit-maint-merge(7) for more information.
languages
jbig2
---- ocrmypdf-8.0.1+dfsg.orig/docs/languages.rst
-+++ ocrmypdf-8.0.1+dfsg/docs/languages.rst
-@@ -7,10 +7,7 @@ OCRmyPDF uses Tesseract for OCR, and rel
+--- ocrmypdf-9.0.1+dfsg.orig/docs/languages.rst
++++ ocrmypdf-9.0.1+dfsg/docs/languages.rst
+@@ -13,9 +13,6 @@ languages <https://github.com/tesseract-
+ For Linux users, you can often find packages that provide language
+ packs:
- Tesseract supports `most languages <https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc#languages>`_.
-
--For Linux users, you can often find packages that provide language packs:
--
-Debian and Ubuntu users
-------------------------
-+You can often find packages that provide language packs:
-
+-=======================
+-
.. code-block:: bash
-@@ -22,55 +19,3 @@ Debian and Ubuntu users
-
- You can then pass the ``-l LANG`` argument to OCRmyPDF to give a hint as to what languages it should search for. Multiple
- languages can be requested using either ``-l eng+fre`` (English and French) or ``-l eng -l fre``.
+ # Display a list of all Tesseract language packs
+@@ -28,32 +25,3 @@ You can then pass the ``-l LANG`` argume
+ to what languages it should search for. Multiple languages can be
+ requested using either ``-l eng+fre`` (English and French) or
+ ``-l eng -l fre``.
-
-Fedora users
--------------
+-============
-
-.. code-block:: bash
-
@@ -49,59 +47,36 @@ See dgit(1), dgit(7) and dgit-maint-merge(7) for more information.
- # Install Chinese Simplified language pack
- dnf install tesseract-langpack-chi_sim
-
--You can then pass the ``-l LANG`` argument to OCRmyPDF to give a hint as to
--what languages it should search for. Multiple languages can be requested using
--either ``-l eng+fre`` (English and French) or ``-l eng -l fre``.
+-You can then pass the ``-l LANG`` argument to OCRmyPDF to give a hint as
+-to what languages it should search for. Multiple languages can be
+-requested using either ``-l eng+fre`` (English and French) or
+-``-l eng -l fre``.
-
-macOS users
-------------
+-===========
-
--You can install additional language packs by :ref:`installing Tesseract using Homebrew with all language packs <macos-all-languages>`.
+-You can install additional language packs by
+-:ref:`installing Tesseract using Homebrew with all language packs <macos-all-languages>`.
-
-Docker users
--------------
--
--Users of the Docker image may use the alternative :ref:`"polyglot" container <docker-polyglot>` which includes all languages.
--
--Adding individual language packs to a Docker image
--""""""""""""""""""""""""""""""""""""""""""""""""""
--
--If you wish to add a single language pack, you could do the following:
--
--* Download the desired ``.trainedata`` file from the `tessdata <https://github.com/tesseract-ocr/tessdata>`_ repository. Let's use Hebrew in this example (``heb.traineddata``)
--
--* Copy the file to ``/home/user/downloads/heb.traineddata``.
--
--* Create a new container based on the ocrmypdf-tess4 image and jump into it with a terminal:
--
--.. code-block:: bash
--
-- host$ docker run -v /home/user/downloads:/home/docker -it --entrypoint /bin/bash ocrmypdf-tess4
--
--* Put the file where Tesseract expects it:
--
--.. code-block:: bash
--
-- docker$ cp /home/docker/heb.traineddata /usr/share/tesseract-ocr/tessdata
--
--* Note the container id, and save it as a new image (in this example, ``ocrmypdf-tess4-heb``)
--
--.. code-block:: bash
--
-- host$ docker commit <container_id> ocrmypdf-tess4-heb
---- ocrmypdf-8.0.1+dfsg.orig/setup.py
-+++ ocrmypdf-8.0.1+dfsg/setup.py
-@@ -240,7 +240,6 @@ setup(
- 'cffi >= 1.9.1', # to build the leptonica module
- 'pytest-runner', # to enable python setup.py test
- 'setuptools_scm', # so that version will work
-- 'setuptools_scm_git_archive' # enable version from github tarballs
+-============
+-
+-Users of the OCRmyPDF Docker image should install language packs into a
+-derived Docker image as
+-:ref:`described in that section <docker-lang-packs>`.
+--- ocrmypdf-9.0.1+dfsg.orig/setup.py
++++ ocrmypdf-9.0.1+dfsg/setup.py
+@@ -87,7 +87,6 @@ setup(
+ 'cffi >= 1.9.1', # to build the leptonica module
+ 'pytest-runner', # to enable python setup.py test
+ 'setuptools_scm', # so that version will work
+- 'setuptools_scm_git_archive', # enable version from github tarballs
],
use_scm_version={'version_scheme': 'post-release'},
- cffi_modules=[
---- ocrmypdf-8.0.1+dfsg.orig/src/ocrmypdf/__main__.py
-+++ ocrmypdf-8.0.1+dfsg/src/ocrmypdf/__main__.py
-@@ -133,17 +133,17 @@ your PDF, use --output-type pdf.
+ cffi_modules=['src/ocrmypdf/lib/compile_leptonica.py:ffibuilder'],
+--- ocrmypdf-9.0.1+dfsg.orig/src/ocrmypdf/cli.py
++++ ocrmypdf-9.0.1+dfsg/src/ocrmypdf/cli.py
+@@ -93,17 +93,17 @@ your PDF, use --output-type pdf.
If OCRmyPDF is given an image file as input, it will attempt to convert the
image to a PDF before processing. For more control over the conversion of
@@ -124,12 +99,12 @@ See dgit(1), dgit(7) and dgit-maint-merge(7) for more information.
""",
)
---- ocrmypdf-8.0.1+dfsg.orig/tests/test_metadata.py
-+++ ocrmypdf-8.0.1+dfsg/tests/test_metadata.py
-@@ -322,43 +322,3 @@ def test_metadata_fixup_warning(resource
- context=context,
- )
- log.warning.assert_called_once()
+--- ocrmypdf-9.0.1+dfsg.orig/tests/test_metadata.py
++++ ocrmypdf-9.0.1+dfsg/tests/test_metadata.py
+@@ -309,36 +309,3 @@ def test_metadata_fixup_warning(resource
+ context = PDFContext(options, outdir, outdir / 'graph_mod.pdf', None)
+ metadata_fixup(working_file=outdir / 'graph.pdf', context=context)
+ assert any(record.levelname == 'WARNING' for record in caplog.records)
-
-
-def test_prevent_gs_invalid_xml(resources, outdir):
@@ -139,29 +114,22 @@ See dgit(1), dgit(7) and dgit-maint-merge(7) for more information.
- from ocrmypdf.pdfinfo import PdfInfo
-
- generate_pdfa_ps(outdir / 'pdfa.ps')
-- input_files = [
-- str(outdir / 'layers.rendered.pdf'),
-- str(outdir / 'pdfa.ps'),
-- ]
- copyfile(resources / 'enron1.pdf', outdir / 'layers.rendered.pdf')
-- log = logging.getLogger()
-- context = JobContext()
-
-- options = parser.parse_args(args=[
-- '-j', '1', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf']
+- options = parser.parse_args(
+- args=['-j', '1', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf']
- )
-- context.options = options
-- context.pdfinfo = PdfInfo(resources / 'enron1.pdf')
+- pdfinfo = PdfInfo(resources / 'enron1.pdf')
+- context = PDFContext(options, outdir, resources / 'enron1.pdf', pdfinfo)
-
- convert_to_pdfa(
-- input_files_groups=input_files,
-- output_file=outdir / 'pdfa.pdf',
-- log=log,
-- context=context
+- str(outdir / 'layers.rendered.pdf'), str(outdir / 'pdfa.ps'), context
- )
-
- with open(outdir / 'pdfa.pdf', 'rb') as f:
-- with mmap.mmap(f.fileno(), 0, flags=mmap.MAP_PRIVATE, prot=mmap.PROT_READ) as mm:
+- with mmap.mmap(
+- f.fileno(), 0, flags=mmap.MAP_PRIVATE, prot=mmap.PROT_READ
+- ) as mm:
- # Since the XML may be invalid, we scan instead of actually feeding it
- # to a parser.
- XMP_MAGIC = b'W5M0MpCehiHzreSzNTczkc9d'