mglib/mglib/stapler.py

301 lines
7.1 KiB
Python

import logging
from mglib.runcmd import run
from mglib.pdfinfo import get_pagecount
from .conf import settings
logger = logging.getLogger(__name__)
#
# Utilities around stapler command line tool
#
# https://github.com/hellerbarde/stapler
#
def cat_ranges_for_reorder(page_count, new_order):
"""
Returns a list of integers. Each number in the list
is correctly positioned (newly ordered) page.
Examples:
If in document with 4 pages first and second pages were
swapped, then returned list will be:
[2, 1, 3, 4]
If first page was swapped with last one (also 4 paegs document)
result list will look like:
[4, 2, 3, 1]
"""
if len(new_order) != page_count:
raise ValueError("Not enough pages specified")
results = []
# key = page_num
# value = page_order
page_map = {}
for item in new_order:
k = int(item['page_order'])
v = int(item['page_num'])
page_map[k] = v
for number in range(1, page_count + 1):
results.append(
page_map[number]
)
return results
def split_ranges(total, after=False, before=False):
"""
Given a range 1, 2, ..., total (page numbers of a doc).
Split it in two lists.
Example:
Input: total = 9, after=1, before=False
Output: list1 = [1]; list2 = [2, 3, 4, ..., 9].
Input: total = 9; after=False, before=1
Output: list1 = [], list2 = [1, 2, 3, 4, ..., 9]
Input: total = 5; after=4; before=False
Output: list1 = [1, 2, 3, 4] list2 = [5]
Input: total = 5; after=False; before=False;
Output: list1 = [1, 2, 3, 4, 5], list2 = []
(it means, by default, all pages are inserted at the end of the doc)
"""
if after and not before:
if not type(after) == int:
raise ValueError(
"argument 'after' is supposed to be an int"
)
list1 = list(range(1, after + 1))
list2 = list(range(after + 1, total + 1))
return list1, list2
if not after and before:
if not type(before) == int:
raise ValueError(
"argument 'before' is supposed to be an int"
)
list1 = list(range(1, before))
list2 = list(range(before, total + 1))
return list1, list2
list1 = list(range(1, total + 1))
list2 = []
return list1, list2
def paste_pages_into_existing_doc(
src,
dst,
data_list,
after_page_number=False,
before_page_number=False
):
page_count = get_pagecount(src)
list1, list2 = split_ranges(
total=page_count,
after=after_page_number,
before=before_page_number
)
# notice missing A
# Letter A is assignent to current folder and
# pages from list1 and list2
letters = "BCDEFGHIJKLMNOPQRSTUVWXYZ"
letters_2_doc_map = []
letters_pages = []
letters_pages_before = []
letters_pages_after = []
letters_2_doc_map.append(
f"A={src}"
)
for idx in range(0, len(data_list)):
letter = letters[idx]
src = data_list[idx]['src']
pages = data_list[idx]['page_nums']
letters_2_doc_map.append(
f"{letter}={src}"
)
for p in pages:
letters_pages.append(
f"{letter}{p}"
)
for p in list1:
letters_pages_before.append(
f"A{p}"
)
for p in list2:
letters_pages_after.append(
f"A{p}"
)
cmd = [
settings.BINARY_STAPLER,
]
cmd.append("sel")
# add A=doc1_path, B=doc2_path
cmd.extend(letters_2_doc_map)
# existing doc pages (may be empty)
cmd.extend(letters_pages_before)
# newly inserted pages
cmd.extend(letters_pages)
# existing doc pages (may be empty)
cmd.extend(letters_pages_after)
cmd.append(dst)
run(cmd)
def paste_pages(
src,
dst,
data_list,
dst_doc_is_new=True,
after_page_number=False,
before_page_number=False
):
"""
dest_doc_ep = endpoint of the doc where newly created
file will be placed.
src_doc_ep_list is a list of following format:
[
{
'doc_ep': doc_ep,
'page_nums': [page_num_1, page_num_2, page_num_3]
},
{
'doc_ep': doc_ep,
'page_nums': [page_num_1, page_num_2, page_num_3]
},
...
]
src_doc_ep_list is a list of documents where pages
(with numbers page_num_1...) will be paste from.
dst_doc_is_new = True well.. destination document was just created,
we are pasting here cutted pages into some folder as new document.
In this case 'after' and 'before' arguments are ignored
dst_doc_is_new = False, pasting pages into exiting document.
If before_page_number > 0 - paste pages before page number
'before_page_number'
If after_page_number > 0 - paste pages after page number
'after_page_number'
before_page_number argument has priority over after_page_number.
If both before_page_number and after_page_number are < 0 - just paste
pages at the end of the document.
"""
if not dst_doc_is_new:
return paste_pages_into_existing_doc(
src=src,
dst=dst,
data_list=data_list,
after_page_number=after_page_number,
before_page_number=before_page_number
)
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
letters_2_doc_map = []
letters_pages = []
for idx in range(0, len(data_list)):
letter = letters[idx]
src = data_list[idx]['src']
pages = data_list[idx]['page_nums']
letters_2_doc_map.append(
f"{letter}={src}"
)
for p in pages:
letters_pages.append(
f"{letter}{p}"
)
cmd = [
settings.BINARY_STAPLER,
]
cmd.append("sel")
# add A=doc1_path, B=doc2_path
cmd.extend(letters_2_doc_map)
cmd.extend(letters_pages)
cmd.append(dst)
run(cmd)
def reorder_pages(
src, dst, new_order
):
"""
new_order is a list of following format:
[
{'page_num': 2, page_order: 1},
{'page_num': 1, page_order: 2},
{'page_num': 3, page_order: 3},
{'page_num': 4, page_order: 4},
]
Example above means that in current document of 4 pages,
first page was swapped with second one.
page_num = older page order
page_order = current page order
So in human language, each hash is read:
<page_num> now should be <page_order>
"""
page_count = get_pagecount(src)
cat_ranges = cat_ranges_for_reorder(
page_count=page_count,
new_order=new_order
)
cmd = [
settings.BINARY_STAPLER,
"sel",
src
]
for page in cat_ranges:
cmd.append(
str(page)
)
cmd.append(dst)
run(cmd)
def delete_pages(src, dst, page_numbers):
cmd = [
settings.BINARY_STAPLER,
"del",
src
]
for page in page_numbers:
cmd.append(
str(page)
)
cmd.append(dst)
run(cmd)