From 25e2958fea17781128f8c96349d4077e4ce05b41 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Thu, 14 Jan 2021 23:06:47 +0100 Subject: [PATCH 1/2] Introducing new optional parameter `at_index` to PdfWriter.addpage --- pdfrw/pdfwriter.py | 36 ++++++++++++++++---------- tests/test_add_page.py | 59 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 14 deletions(-) create mode 100755 tests/test_add_page.py diff --git a/pdfrw/pdfwriter.py b/pdfrw/pdfwriter.py index 3c887ba..060edb0 100755 --- a/pdfrw/pdfwriter.py +++ b/pdfrw/pdfwriter.py @@ -254,6 +254,8 @@ def __init__(self, fname=None, version='1.3', compress=False, **kwargs): self.fname = fname self.version = version self.compress = compress + self.pagearray = PdfArray() + self.killobj = {} if kwargs: for name, value in iteritems(kwargs): @@ -262,29 +264,34 @@ def __init__(self, fname=None, version='1.3', compress=False, **kwargs): "on PdfWriter instance" % name) setattr(self, name, value) - self.pagearray = PdfArray() - self.killobj = {} - - def addpage(self, page): - self._trailer = None + def addpage(self, page, at_index=None): + """ + If `at_index` is None (default), the page will be appended at the end. + Else, it is an integer representing the new index of the inserted page. + """ if page.Type != PdfName.Page: raise PdfOutputError('Bad /Type: Expected %s, found %s' % (PdfName.Page, page.Type)) inheritable = page.inheritable # searches for resources - self.pagearray.append( - IndirectPdfDict( - page, - Resources=inheritable.Resources, - MediaBox=inheritable.MediaBox, - CropBox=inheritable.CropBox, - Rotate=inheritable.Rotate, - ) + new_page = IndirectPdfDict( + page, + Resources=inheritable.Resources, + MediaBox=inheritable.MediaBox, + CropBox=inheritable.CropBox, + Rotate=inheritable.Rotate, ) + if at_index is None: + self.pagearray.append(new_page) + else: + self.pagearray.insert(at_index, new_page) + if self._trailer: + count = int(self._trailer.Root.Pages.Count) + self._trailer.Root.Pages.Count = PdfObject(count + 1) # Add parents in the hierarchy to objects we # don't want to output killobj = self.killobj - obj, new_obj = page, self.pagearray[-1] + obj, new_obj = page, new_page while obj is not None: objid = id(obj) if objid in killobj: @@ -331,6 +338,7 @@ def _get_trailer(self): def _set_trailer(self, trailer): self._trailer = trailer + self.pagearray = self._trailer.Root.Pages.Kids trailer = property(_get_trailer, _set_trailer) diff --git a/tests/test_add_page.py b/tests/test_add_page.py new file mode 100755 index 0000000..ad02eaa --- /dev/null +++ b/tests/test_add_page.py @@ -0,0 +1,59 @@ +#! /usr/bin/env python +import hashlib, os + +from pdfrw import PdfReader, PdfWriter +from pdfrw.objects import PdfName, PdfDict, IndirectPdfDict + +try: + import unittest2 as unittest +except ImportError: + import unittest + +import static_pdfs + + +class TestAddPage(unittest.TestCase): + + def test_append_page(self): + in_filepath = static_pdfs.pdffiles[0][5] # global/0ae80b493bc21e6de99f2ff6bbb8bc2c.pdf + out_filepath = "test_append_page.pdf" + for new_page_index, expected_hash in ( + (0, "d76f5573918cba2070da93a10c19d062"), + (1, "337677eae9528441f6c8aafef513c461"), + (-1, "cf306bd87a3a094f506427a5c130c479"), + (None, "cdd97f6794d78d7ef8be0ddf3101ff9d"), + ): + writer = PdfWriter(trailer=PdfReader(in_filepath)) + writer.addpage(new_page(), at_index=new_page_index) + writer.write(out_filepath) + self.assertEqual(file_hash(out_filepath), expected_hash) + os.remove(out_filepath) + +def new_page(): + contents = IndirectPdfDict() + contents.stream = """2 J +0.57 w +BT /F1 36.00 Tf ET +BT 141.73 700.16 Td (Hello!) Tj ET""" + return PdfDict( + Type=PdfName.Page, + Resources=IndirectPdfDict( + Font=PdfDict( + F1=IndirectPdfDict( + BaseFont=PdfName.Helvetica, + Encoding=PdfName.WinAnsiEncoding, + Subtype=PdfName.Type1, + Type=PdfName.Font, + ), + ), + ), + Contents=contents, + ) + +def file_hash(file_path): + with open(file_path, 'rb') as data: + return hashlib.md5(data.read()).hexdigest() + + +if __name__ == '__main__': + unittest.main() From f347bd9c085a969fce6ed65f2fa61d9f1da0b398 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Thu, 1 Apr 2021 15:38:29 +0200 Subject: [PATCH 2/2] Pleasing pep8speaks --- tests/test_add_page.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_add_page.py b/tests/test_add_page.py index ad02eaa..cc29c97 100755 --- a/tests/test_add_page.py +++ b/tests/test_add_page.py @@ -1,5 +1,6 @@ #! /usr/bin/env python -import hashlib, os +import hashlib +import os from pdfrw import PdfReader, PdfWriter from pdfrw.objects import PdfName, PdfDict, IndirectPdfDict @@ -15,7 +16,8 @@ class TestAddPage(unittest.TestCase): def test_append_page(self): - in_filepath = static_pdfs.pdffiles[0][5] # global/0ae80b493bc21e6de99f2ff6bbb8bc2c.pdf + # global/0ae80b493bc21e6de99f2ff6bbb8bc2c.pdf + in_filepath = static_pdfs.pdffiles[0][5] out_filepath = "test_append_page.pdf" for new_page_index, expected_hash in ( (0, "d76f5573918cba2070da93a10c19d062"), @@ -29,6 +31,7 @@ def test_append_page(self): self.assertEqual(file_hash(out_filepath), expected_hash) os.remove(out_filepath) + def new_page(): contents = IndirectPdfDict() contents.stream = """2 J @@ -50,6 +53,7 @@ def new_page(): Contents=contents, ) + def file_hash(file_path): with open(file_path, 'rb') as data: return hashlib.md5(data.read()).hexdigest()