mirror of https://github.com/papermerge/mglib
removed pdftk dependency
parent
9e24776ba8
commit
fa90e6b0a6
|
@ -23,10 +23,6 @@ BINARY_IDENTIFY = "/usr/bin/identify"
|
||||||
# Used to extract text from images/PDF files.
|
# Used to extract text from images/PDF files.
|
||||||
BINARY_OCR = "/usr/bin/tesseract"
|
BINARY_OCR = "/usr/bin/tesseract"
|
||||||
|
|
||||||
# Provided by pdftk package
|
|
||||||
# Used to reorder, cut/paste, delete pages withing PDF document
|
|
||||||
BINARY_PDFTK = "/usr/bin/pdftk"
|
|
||||||
|
|
||||||
# Provided by stapler
|
# Provided by stapler
|
||||||
# Used to edit PDF documents
|
# Used to edit PDF documents
|
||||||
BINARY_STAPLER = "~/.local/bin/stapler"
|
BINARY_STAPLER = "~/.local/bin/stapler"
|
||||||
|
|
357
mglib/pdftk.py
357
mglib/pdftk.py
|
@ -1,357 +0,0 @@
|
||||||
import logging
|
|
||||||
|
|
||||||
from mglib.runcmd import run
|
|
||||||
from mglib.pdfinfo import get_pagecount
|
|
||||||
|
|
||||||
from .conf import settings
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
#
|
|
||||||
# Utilities around pdftk command line tool
|
|
||||||
#
|
|
||||||
# https://www.pdflabs.com/docs/pdftk-man-page/
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
def cat_ranges_for_reorder(page_count, new_order):
|
|
||||||
"""
|
|
||||||
Returns a list of integers. Each number in the list
|
|
||||||
is correctly positioned (newly ordered) page.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
If in document with 4 pages first and second pages were
|
|
||||||
swapped, then returned list will be:
|
|
||||||
|
|
||||||
[2, 1, 3, 4]
|
|
||||||
|
|
||||||
If first page was swapped with last one (also 4 paegs document)
|
|
||||||
result list will look like:
|
|
||||||
|
|
||||||
[4, 2, 3, 1]
|
|
||||||
"""
|
|
||||||
if len(new_order) != page_count:
|
|
||||||
raise ValueError("Not enough pages specified")
|
|
||||||
results = []
|
|
||||||
# key = page_num
|
|
||||||
# value = page_order
|
|
||||||
page_map = {}
|
|
||||||
|
|
||||||
for item in new_order:
|
|
||||||
k = int(item['page_order'])
|
|
||||||
v = int(item['page_num'])
|
|
||||||
page_map[k] = v
|
|
||||||
|
|
||||||
for number in range(1, page_count + 1):
|
|
||||||
results.append(
|
|
||||||
page_map[number]
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def cat_ranges_for_delete(page_count, page_numbers):
|
|
||||||
"""
|
|
||||||
Returns a list of integers. Each number in the list
|
|
||||||
is the number of page which will 'stay' in document.
|
|
||||||
In other words, it returns a list with not deleted pages.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
|
|
||||||
If document has 22 pages (page_count=22) and page number 21 is to be
|
|
||||||
deleted (i.e page_numbers = [21]) will return
|
|
||||||
|
|
||||||
[1, 2, 3, 4, ..., 19, 20, 22]
|
|
||||||
|
|
||||||
If page number 1 is to be deleted:
|
|
||||||
|
|
||||||
[2, 3, 4, ..., 22] list will be returned.
|
|
||||||
|
|
||||||
If page number is 22 is to be deleted:
|
|
||||||
|
|
||||||
[1, 2, 3,..., 21] will be returned.
|
|
||||||
|
|
||||||
With page_numbers=[1, 7, 10] and page_count=22 result
|
|
||||||
will be:
|
|
||||||
|
|
||||||
(2, 3, 4, 5, 6, 8, 9, 11, 12 , 13, ..., 22)
|
|
||||||
|
|
||||||
|
|
||||||
page_numbers is a list of page numbers (starting with 1).
|
|
||||||
"""
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for check in page_numbers:
|
|
||||||
if not isinstance(check, int):
|
|
||||||
err_msg = "page_numbers must be a list of ints"
|
|
||||||
raise ValueError(err_msg)
|
|
||||||
|
|
||||||
for number in range(1, page_count + 1):
|
|
||||||
if number not in page_numbers:
|
|
||||||
results.append(number)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def split_ranges(total, after=False, before=False):
|
|
||||||
"""
|
|
||||||
Given a range 1, 2, ..., total (page numbers of a doc).
|
|
||||||
Split it in two lists.
|
|
||||||
Example:
|
|
||||||
Input: total = 9, after=1, before=False
|
|
||||||
Output: list1 = [1]; list2 = [2, 3, 4, ..., 9].
|
|
||||||
|
|
||||||
Input: total = 9; after=False, before=1
|
|
||||||
Output: list1 = [], list2 = [1, 2, 3, 4, ..., 9]
|
|
||||||
|
|
||||||
Input: total = 5; after=4; before=False
|
|
||||||
Output: list1 = [1, 2, 3, 4] list2 = [5]
|
|
||||||
|
|
||||||
Input: total = 5; after=False; before=False;
|
|
||||||
Output: list1 = [1, 2, 3, 4, 5], list2 = []
|
|
||||||
(it means, by default, all pages are inserted at the end of the doc)
|
|
||||||
"""
|
|
||||||
if after and not before:
|
|
||||||
if not type(after) == int:
|
|
||||||
raise ValueError(
|
|
||||||
"argument 'after' is supposed to be an int"
|
|
||||||
)
|
|
||||||
list1 = list(range(1, after + 1))
|
|
||||||
list2 = list(range(after + 1, total + 1))
|
|
||||||
return list1, list2
|
|
||||||
|
|
||||||
if not after and before:
|
|
||||||
if not type(before) == int:
|
|
||||||
raise ValueError(
|
|
||||||
"argument 'before' is supposed to be an int"
|
|
||||||
)
|
|
||||||
list1 = list(range(1, before))
|
|
||||||
list2 = list(range(before, total + 1))
|
|
||||||
return list1, list2
|
|
||||||
|
|
||||||
list1 = list(range(1, total + 1))
|
|
||||||
list2 = []
|
|
||||||
|
|
||||||
return list1, list2
|
|
||||||
|
|
||||||
|
|
||||||
def paste_pages_into_existing_doc(
|
|
||||||
src,
|
|
||||||
dst,
|
|
||||||
data_list,
|
|
||||||
after_page_number=False,
|
|
||||||
before_page_number=False
|
|
||||||
):
|
|
||||||
page_count = get_pagecount(src)
|
|
||||||
list1, list2 = split_ranges(
|
|
||||||
total=page_count,
|
|
||||||
after=after_page_number,
|
|
||||||
before=before_page_number
|
|
||||||
)
|
|
||||||
# notice missing A
|
|
||||||
# Letter A is assignent to current folder and
|
|
||||||
# pages from list1 and list2
|
|
||||||
letters = "BCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
||||||
letters_2_doc_map = []
|
|
||||||
letters_pages = []
|
|
||||||
letters_pages_before = []
|
|
||||||
letters_pages_after = []
|
|
||||||
|
|
||||||
letters_2_doc_map.append(
|
|
||||||
f"A={src}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for idx in range(0, len(data_list)):
|
|
||||||
letter = letters[idx]
|
|
||||||
src = data_list[idx]['src']
|
|
||||||
pages = data_list[idx]['page_nums']
|
|
||||||
|
|
||||||
letters_2_doc_map.append(
|
|
||||||
f"{letter}={src}"
|
|
||||||
)
|
|
||||||
for p in pages:
|
|
||||||
letters_pages.append(
|
|
||||||
f"{letter}{p}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for p in list1:
|
|
||||||
letters_pages_before.append(
|
|
||||||
f"A{p}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for p in list2:
|
|
||||||
letters_pages_after.append(
|
|
||||||
f"A{p}"
|
|
||||||
)
|
|
||||||
|
|
||||||
cmd = [
|
|
||||||
settings.BINARY_PDFTK,
|
|
||||||
]
|
|
||||||
# add A=doc1_path, B=doc2_path
|
|
||||||
cmd.extend(letters_2_doc_map)
|
|
||||||
|
|
||||||
cmd.append("cat")
|
|
||||||
|
|
||||||
# existing doc pages (may be empty)
|
|
||||||
cmd.extend(letters_pages_before)
|
|
||||||
# newly inserted pages
|
|
||||||
cmd.extend(letters_pages)
|
|
||||||
# existing doc pages (may be empty)
|
|
||||||
cmd.extend(letters_pages_after)
|
|
||||||
|
|
||||||
cmd.append("output")
|
|
||||||
|
|
||||||
cmd.append(dst)
|
|
||||||
|
|
||||||
run(cmd)
|
|
||||||
|
|
||||||
|
|
||||||
def paste_pages(
|
|
||||||
src,
|
|
||||||
dst,
|
|
||||||
data_list,
|
|
||||||
dst_doc_is_new=True,
|
|
||||||
after_page_number=False,
|
|
||||||
before_page_number=False
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
dest_doc_ep = endpoint of the doc where newly created
|
|
||||||
file will be placed.
|
|
||||||
src_doc_ep_list is a list of following format:
|
|
||||||
[
|
|
||||||
{
|
|
||||||
'doc_ep': doc_ep,
|
|
||||||
'page_nums': [page_num_1, page_num_2, page_num_3]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'doc_ep': doc_ep,
|
|
||||||
'page_nums': [page_num_1, page_num_2, page_num_3]
|
|
||||||
},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
src_doc_ep_list is a list of documents where pages
|
|
||||||
(with numbers page_num_1...) will be paste from.
|
|
||||||
|
|
||||||
dst_doc_is_new = True well.. destination document was just created,
|
|
||||||
we are pasting here cutted pages into some folder as new document.
|
|
||||||
|
|
||||||
In this case 'after' and 'before' arguments are ignored
|
|
||||||
|
|
||||||
dst_doc_is_new = False, pasting pages into exiting document.
|
|
||||||
If before_page_number > 0 - paste pages before page number
|
|
||||||
'before_page_number'
|
|
||||||
If after_page_number > 0 - paste pages after page number
|
|
||||||
'after_page_number'
|
|
||||||
|
|
||||||
before_page_number argument has priority over after_page_number.
|
|
||||||
|
|
||||||
If both before_page_number and after_page_number are < 0 - just paste
|
|
||||||
pages at the end of the document.
|
|
||||||
"""
|
|
||||||
if not dst_doc_is_new:
|
|
||||||
return paste_pages_into_existing_doc(
|
|
||||||
src=src,
|
|
||||||
dst=dst,
|
|
||||||
data_list=data_list,
|
|
||||||
after_page_number=after_page_number,
|
|
||||||
before_page_number=before_page_number
|
|
||||||
)
|
|
||||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
||||||
letters_2_doc_map = []
|
|
||||||
letters_pages = []
|
|
||||||
|
|
||||||
for idx in range(0, len(data_list)):
|
|
||||||
letter = letters[idx]
|
|
||||||
src = data_list[idx]['src']
|
|
||||||
pages = data_list[idx]['page_nums']
|
|
||||||
|
|
||||||
letters_2_doc_map.append(
|
|
||||||
f"{letter}={src}"
|
|
||||||
)
|
|
||||||
for p in pages:
|
|
||||||
letters_pages.append(
|
|
||||||
f"{letter}{p}"
|
|
||||||
)
|
|
||||||
|
|
||||||
cmd = [
|
|
||||||
settings.BINARY_PDFTK,
|
|
||||||
]
|
|
||||||
# add A=doc1_path, B=doc2_path
|
|
||||||
cmd.extend(letters_2_doc_map)
|
|
||||||
|
|
||||||
cmd.append("cat")
|
|
||||||
|
|
||||||
cmd.extend(letters_pages)
|
|
||||||
|
|
||||||
cmd.append("output")
|
|
||||||
|
|
||||||
cmd.append(dst)
|
|
||||||
|
|
||||||
run(cmd)
|
|
||||||
|
|
||||||
|
|
||||||
def reorder_pages(
|
|
||||||
src, dst, new_order
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
new_order is a list of following format:
|
|
||||||
|
|
||||||
[
|
|
||||||
{'page_num': 2, page_order: 1},
|
|
||||||
{'page_num': 1, page_order: 2},
|
|
||||||
{'page_num': 3, page_order: 3},
|
|
||||||
{'page_num': 4, page_order: 4},
|
|
||||||
]
|
|
||||||
Example above means that in current document of 4 pages,
|
|
||||||
first page was swapped with second one.
|
|
||||||
page_num = older page order
|
|
||||||
page_order = current page order
|
|
||||||
So in human language, each hash is read:
|
|
||||||
<page_num> now should be <page_order>
|
|
||||||
"""
|
|
||||||
page_count = get_pagecount(src)
|
|
||||||
|
|
||||||
cat_ranges = cat_ranges_for_reorder(
|
|
||||||
page_count=page_count,
|
|
||||||
new_order=new_order
|
|
||||||
)
|
|
||||||
|
|
||||||
cmd = [
|
|
||||||
settings.BINARY_PDFTK,
|
|
||||||
src,
|
|
||||||
"cat"
|
|
||||||
]
|
|
||||||
for page in cat_ranges:
|
|
||||||
cmd.append(
|
|
||||||
str(page)
|
|
||||||
)
|
|
||||||
|
|
||||||
cmd.append("output")
|
|
||||||
cmd.append(dst)
|
|
||||||
run(cmd)
|
|
||||||
|
|
||||||
|
|
||||||
def delete_pages(src, dst, page_numbers):
|
|
||||||
page_count = get_pagecount(src)
|
|
||||||
|
|
||||||
cat_ranges = cat_ranges_for_delete(
|
|
||||||
page_count,
|
|
||||||
page_numbers
|
|
||||||
)
|
|
||||||
|
|
||||||
cmd = [
|
|
||||||
settings.BINARY_PDFTK,
|
|
||||||
src,
|
|
||||||
"cat"
|
|
||||||
]
|
|
||||||
for page in cat_ranges:
|
|
||||||
cmd.append(
|
|
||||||
str(page)
|
|
||||||
)
|
|
||||||
|
|
||||||
cmd.append("output")
|
|
||||||
cmd.append(dst)
|
|
||||||
|
|
||||||
run(cmd)
|
|
|
@ -4,7 +4,7 @@ import shutil
|
||||||
from os import listdir
|
from os import listdir
|
||||||
from os.path import isdir, join
|
from os.path import isdir, join
|
||||||
|
|
||||||
from mglib import pdftk
|
from mglib import stapler
|
||||||
from mglib.path import DocumentPath, PagePath
|
from mglib.path import DocumentPath, PagePath
|
||||||
from mglib.step import Steps
|
from mglib.step import Steps
|
||||||
from mglib.utils import get_assigns_after_delete, safe_to_delete
|
from mglib.utils import get_assigns_after_delete, safe_to_delete
|
||||||
|
@ -209,7 +209,7 @@ class Storage:
|
||||||
self.abspath(dst_doc_path)
|
self.abspath(dst_doc_path)
|
||||||
)
|
)
|
||||||
|
|
||||||
pdftk.reorder_pages(
|
stapler.reorder_pages(
|
||||||
src=self.abspath(src_doc_path),
|
src=self.abspath(src_doc_path),
|
||||||
dst=self.abspath(dst_doc_path),
|
dst=self.abspath(dst_doc_path),
|
||||||
new_order=new_order
|
new_order=new_order
|
||||||
|
@ -269,7 +269,7 @@ class Storage:
|
||||||
self.make_sure_path_exists(
|
self.make_sure_path_exists(
|
||||||
self.abspath(dst_doc_path)
|
self.abspath(dst_doc_path)
|
||||||
)
|
)
|
||||||
pdftk.delete_pages(
|
stapler.delete_pages(
|
||||||
self.abspath(src_doc_path),
|
self.abspath(src_doc_path),
|
||||||
self.abspath(dst_doc_path),
|
self.abspath(dst_doc_path),
|
||||||
page_numbers
|
page_numbers
|
||||||
|
@ -332,7 +332,7 @@ class Storage:
|
||||||
self.abspath(next_ver_dp)
|
self.abspath(next_ver_dp)
|
||||||
)
|
)
|
||||||
|
|
||||||
pdftk.paste_pages(
|
stapler.paste_pages(
|
||||||
src=self.abspath(dest_doc_path),
|
src=self.abspath(dest_doc_path),
|
||||||
dst=self.abspath(next_ver_dp),
|
dst=self.abspath(next_ver_dp),
|
||||||
data_list=data_list,
|
data_list=data_list,
|
||||||
|
|
|
@ -1,145 +0,0 @@
|
||||||
import os
|
|
||||||
import unittest
|
|
||||||
from unittest import mock
|
|
||||||
from mglib import pdftk
|
|
||||||
from mglib.conf import settings
|
|
||||||
from mglib.runcmd import run
|
|
||||||
|
|
||||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
DATA_DIR = os.path.join(BASE_DIR, "data")
|
|
||||||
|
|
||||||
|
|
||||||
class TestPdfLib(unittest.TestCase):
|
|
||||||
def test_ranges_for_reorder(self):
|
|
||||||
actual = pdftk.cat_ranges_for_reorder(4, [
|
|
||||||
{"page_order": 1, "page_num": 4},
|
|
||||||
{"page_order": 2, "page_num": 3},
|
|
||||||
{"page_order": 3, "page_num": 2},
|
|
||||||
{"page_order": 4, "page_num": 1}
|
|
||||||
])
|
|
||||||
expected = [4,3,2,1]
|
|
||||||
assert expected == actual
|
|
||||||
|
|
||||||
self.assertRaises(ValueError, pdftk.cat_ranges_for_reorder, 2, [])
|
|
||||||
self.assertRaises(KeyError, pdftk.cat_ranges_for_reorder, 2, [
|
|
||||||
{"page_order": 3, "page_num": 4},
|
|
||||||
{"page_order": 5, "page_num": 6}
|
|
||||||
])
|
|
||||||
|
|
||||||
def test_delete_pages(self):
|
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
|
||||||
|
|
||||||
with mock.patch("mglib.pdftk.run") as run_func:
|
|
||||||
pdftk.delete_pages(input_file, output_file, [1])
|
|
||||||
run_func.assert_called()
|
|
||||||
run_func.assert_called_with(
|
|
||||||
[settings.BINARY_PDFTK, input_file, "cat", "2", "output", output_file]
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_cat_ranges_for_delete(self):
|
|
||||||
page_count = 22
|
|
||||||
page_numbers = range(1, 23)
|
|
||||||
|
|
||||||
actual = pdftk.cat_ranges_for_delete(page_count, [21])
|
|
||||||
expected = list(page_numbers)
|
|
||||||
expected.remove(21)
|
|
||||||
assert actual == expected
|
|
||||||
|
|
||||||
actual = pdftk.cat_ranges_for_delete(page_count, [1])
|
|
||||||
expected = list(page_numbers)
|
|
||||||
expected.remove(1)
|
|
||||||
assert actual == expected
|
|
||||||
|
|
||||||
actual = pdftk.cat_ranges_for_delete(page_count, [1, 7, 10])
|
|
||||||
expected = list(page_numbers)
|
|
||||||
expected.remove(1)
|
|
||||||
expected.remove(7)
|
|
||||||
expected.remove(10)
|
|
||||||
assert actual == expected
|
|
||||||
|
|
||||||
self.assertRaises(ValueError, pdftk.cat_ranges_for_delete, page_count, ["1"])
|
|
||||||
|
|
||||||
def test_split_ranges(self):
|
|
||||||
page_count = 9
|
|
||||||
page_numbers = list(range(1, 10))
|
|
||||||
|
|
||||||
self.assertRaises(ValueError, pdftk.split_ranges, 9, after="a", before=False)
|
|
||||||
self.assertRaises(ValueError, pdftk.split_ranges, 9, after=False, before=True)
|
|
||||||
|
|
||||||
actual1, actual2 = pdftk.split_ranges(page_count, 1, False)
|
|
||||||
expected1 = [1]
|
|
||||||
expected2 = [2, 3, 4, 5, 6, 7, 8, 9]
|
|
||||||
assert actual1 == expected1
|
|
||||||
assert actual2 == expected2
|
|
||||||
|
|
||||||
actual1, actual2 = pdftk.split_ranges(page_count, False, 2)
|
|
||||||
expected1 = [1]
|
|
||||||
expected2 = [2, 3, 4, 5, 6, 7, 8, 9]
|
|
||||||
assert actual1 == expected1
|
|
||||||
assert actual2 == expected2
|
|
||||||
|
|
||||||
actual1, actual2 = pdftk.split_ranges(page_count)
|
|
||||||
expected1 = list(range(1, page_count + 1))
|
|
||||||
expected2 = []
|
|
||||||
assert actual1 == expected1
|
|
||||||
assert actual2 == expected2
|
|
||||||
|
|
||||||
def test_reorder_pages(self):
|
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
|
||||||
new_order = [
|
|
||||||
{'page_num': 2, 'page_order': 1},
|
|
||||||
{'page_num': 1, 'page_order': 2},
|
|
||||||
]
|
|
||||||
|
|
||||||
with mock.patch("mglib.pdftk.run") as run_func:
|
|
||||||
pdftk.reorder_pages(input_file, output_file, new_order)
|
|
||||||
run_func.assert_called()
|
|
||||||
run_func.assert_called_with(
|
|
||||||
[settings.BINARY_PDFTK, input_file, "cat", "2", "1", "output", output_file]
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_paste_pages_into_existing_doc(self):
|
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
|
||||||
datalist = []
|
|
||||||
|
|
||||||
with mock.patch("mglib.pdftk.run") as run_func:
|
|
||||||
pdftk.paste_pages_into_existing_doc(input_file, output_file, datalist)
|
|
||||||
run_func.assert_called()
|
|
||||||
run_func.assert_called_with(
|
|
||||||
[settings.BINARY_PDFTK, "A=" + input_file, "cat", "A1", "A2", "output", output_file]
|
|
||||||
)
|
|
||||||
|
|
||||||
datalist = [{"src": input_file, "page_nums": "34"}]
|
|
||||||
|
|
||||||
with mock.patch("mglib.pdftk.run") as run_func:
|
|
||||||
pdftk.paste_pages_into_existing_doc(input_file, output_file, datalist, 1)
|
|
||||||
run_func.assert_called()
|
|
||||||
run_func.assert_called_with(
|
|
||||||
[settings.BINARY_PDFTK, "A=" + input_file, "B=" + input_file, "cat", "A1", "B3",
|
|
||||||
"B4", "A2", "output", output_file]
|
|
||||||
)
|
|
||||||
def test_paste_pages(self):
|
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
|
||||||
datalist = []
|
|
||||||
|
|
||||||
with mock.patch("mglib.pdftk.run") as run_func:
|
|
||||||
pdftk.paste_pages(input_file, output_file, datalist, False)
|
|
||||||
run_func.assert_called()
|
|
||||||
run_func.assert_called_with(
|
|
||||||
[settings.BINARY_PDFTK, "A=" + input_file, "cat", "A1", "A2", "output", output_file]
|
|
||||||
)
|
|
||||||
|
|
||||||
datalist = [{"src": input_file, "page_nums": "34"}]
|
|
||||||
|
|
||||||
with mock.patch("mglib.pdftk.run") as run_func:
|
|
||||||
pdftk.paste_pages(input_file, output_file, datalist)
|
|
||||||
run_func.assert_called()
|
|
||||||
run_func.assert_called_with(
|
|
||||||
[settings.BINARY_PDFTK, "A=" + input_file, "cat", "A3", "A4",
|
|
||||||
"output", output_file]
|
|
||||||
)
|
|
|
@ -3,7 +3,6 @@ import unittest
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from mglib import stapler
|
from mglib import stapler
|
||||||
from mglib.conf import settings
|
from mglib.conf import settings
|
||||||
from mglib.runcmd import run
|
|
||||||
|
|
||||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
@ -17,15 +16,15 @@ class TestPdfLib(unittest.TestCase):
|
||||||
{"page_order": 2, "page_num": 3},
|
{"page_order": 2, "page_num": 3},
|
||||||
{"page_order": 3, "page_num": 2},
|
{"page_order": 3, "page_num": 2},
|
||||||
{"page_order": 4, "page_num": 1}
|
{"page_order": 4, "page_num": 1}
|
||||||
])
|
])
|
||||||
expected = [4,3,2,1]
|
expected = [4, 3, 2, 1]
|
||||||
assert expected == actual
|
assert expected == actual
|
||||||
|
|
||||||
self.assertRaises(ValueError, stapler.cat_ranges_for_reorder, 2, [])
|
self.assertRaises(ValueError, stapler.cat_ranges_for_reorder, 2, [])
|
||||||
self.assertRaises(KeyError, stapler.cat_ranges_for_reorder, 2, [
|
self.assertRaises(KeyError, stapler.cat_ranges_for_reorder, 2, [
|
||||||
{"page_order": 3, "page_num": 4},
|
{"page_order": 3, "page_num": 4},
|
||||||
{"page_order": 5, "page_num": 6}
|
{"page_order": 5, "page_num": 6}
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_delete_pages(self):
|
def test_delete_pages(self):
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
||||||
|
@ -38,13 +37,22 @@ class TestPdfLib(unittest.TestCase):
|
||||||
[settings.BINARY_STAPLER, "del", input_file, "1", output_file]
|
[settings.BINARY_STAPLER, "del", input_file, "1", output_file]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_split_ranges(self):
|
def test_split_ranges(self):
|
||||||
page_count = 9
|
page_count = 9
|
||||||
page_numbers = list(range(1, 10))
|
|
||||||
|
|
||||||
self.assertRaises(ValueError, stapler.split_ranges, 9, after="a", before=False)
|
self.assertRaises(
|
||||||
self.assertRaises(ValueError, stapler.split_ranges, 9, after=False, before=True)
|
ValueError,
|
||||||
|
stapler.split_ranges,
|
||||||
|
9,
|
||||||
|
after="a",
|
||||||
|
before=False
|
||||||
|
)
|
||||||
|
self.assertRaises(
|
||||||
|
ValueError,
|
||||||
|
stapler.split_ranges,
|
||||||
|
9, after=False,
|
||||||
|
before=True
|
||||||
|
)
|
||||||
|
|
||||||
actual1, actual2 = stapler.split_ranges(page_count, 1, False)
|
actual1, actual2 = stapler.split_ranges(page_count, 1, False)
|
||||||
expected1 = [1]
|
expected1 = [1]
|
||||||
|
@ -67,54 +75,81 @@ class TestPdfLib(unittest.TestCase):
|
||||||
def test_reorder_pages(self):
|
def test_reorder_pages(self):
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
||||||
new_order = [
|
new_order = [
|
||||||
{'page_num': 2, 'page_order': 1},
|
{'page_num': 2, 'page_order': 1},
|
||||||
{'page_num': 1, 'page_order': 2},
|
{'page_num': 1, 'page_order': 2},
|
||||||
]
|
]
|
||||||
|
|
||||||
with mock.patch("mglib.stapler.run") as run_func:
|
with mock.patch("mglib.stapler.run") as run_func:
|
||||||
stapler.reorder_pages(input_file, output_file, new_order)
|
stapler.reorder_pages(input_file, output_file, new_order)
|
||||||
run_func.assert_called()
|
run_func.assert_called()
|
||||||
run_func.assert_called_with(
|
run_func.assert_called_with(
|
||||||
[settings.BINARY_STAPLER, "sel", input_file, "2", "1", output_file]
|
[
|
||||||
|
settings.BINARY_STAPLER,
|
||||||
|
"sel",
|
||||||
|
input_file,
|
||||||
|
"2",
|
||||||
|
"1",
|
||||||
|
output_file
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_paste_pages_into_existing_doc(self):
|
def test_paste_pages_into_existing_doc(self):
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
||||||
datalist = []
|
datalist = []
|
||||||
|
|
||||||
with mock.patch("mglib.stapler.run") as run_func:
|
with mock.patch("mglib.stapler.run") as run_func:
|
||||||
stapler.paste_pages_into_existing_doc(input_file, output_file, datalist)
|
stapler.paste_pages_into_existing_doc(
|
||||||
|
input_file, output_file, datalist
|
||||||
|
)
|
||||||
run_func.assert_called()
|
run_func.assert_called()
|
||||||
run_func.assert_called_with(
|
run_func.assert_called_with(
|
||||||
[settings.BINARY_STAPLER, "sel", "A=" + input_file, "A1", "A2", output_file]
|
[
|
||||||
|
settings.BINARY_STAPLER,
|
||||||
|
"sel", "A=" + input_file, "A1", "A2", output_file
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
datalist = [{"src": input_file, "page_nums": "34"}]
|
datalist = [{"src": input_file, "page_nums": "34"}]
|
||||||
|
|
||||||
with mock.patch("mglib.stapler.run") as run_func:
|
with mock.patch("mglib.stapler.run") as run_func:
|
||||||
stapler.paste_pages_into_existing_doc(input_file, output_file, datalist, 1)
|
stapler.paste_pages_into_existing_doc(
|
||||||
|
input_file,
|
||||||
|
output_file,
|
||||||
|
datalist,
|
||||||
|
1
|
||||||
|
)
|
||||||
run_func.assert_called()
|
run_func.assert_called()
|
||||||
run_func.assert_called_with(
|
run_func.assert_called_with(
|
||||||
[settings.BINARY_STAPLER, "sel", "A=" + input_file, "B=" + input_file, "A1", "B3",
|
[
|
||||||
"B4", "A2", output_file]
|
settings.BINARY_STAPLER,
|
||||||
|
"sel", "A=" + input_file,
|
||||||
|
"B=" + input_file, "A1", "B3",
|
||||||
|
"B4", "A2", output_file
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_paste_pages(self):
|
def test_paste_pages(self):
|
||||||
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
input_file = os.path.join(DATA_DIR, "berlin.pdf")
|
||||||
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
output_file = os.path.join(DATA_DIR, "berlin2.pdf")
|
||||||
datalist = []
|
datalist = []
|
||||||
|
|
||||||
with mock.patch("mglib.stapler.run") as run_func:
|
with mock.patch("mglib.stapler.run") as run_func:
|
||||||
stapler.paste_pages(input_file, output_file, datalist, False)
|
stapler.paste_pages(input_file, output_file, datalist, False)
|
||||||
run_func.assert_called()
|
run_func.assert_called()
|
||||||
run_func.assert_called_with(
|
run_func.assert_called_with(
|
||||||
[settings.BINARY_STAPLER, "sel", "A=" + input_file, "A1", "A2", output_file]
|
[
|
||||||
|
settings.BINARY_STAPLER,
|
||||||
|
"sel",
|
||||||
|
"A=" + input_file,
|
||||||
|
"A1",
|
||||||
|
"A2",
|
||||||
|
output_file
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
datalist = [{"src": input_file, "page_nums": "34"}]
|
datalist = [{"src": input_file, "page_nums": "34"}]
|
||||||
|
|
||||||
with mock.patch("mglib.stapler.run") as run_func:
|
with mock.patch("mglib.stapler.run") as run_func:
|
||||||
stapler.paste_pages(input_file, output_file, datalist)
|
stapler.paste_pages(input_file, output_file, datalist)
|
||||||
|
|
Loading…
Reference in New Issue