diff --git a/mglib/ocrmigrate.py b/mglib/ocrmigrate.py index 3feaed3..aa811c8 100644 --- a/mglib/ocrmigrate.py +++ b/mglib/ocrmigrate.py @@ -18,59 +18,6 @@ OcrMigrate class takes care of this sort of txt/hocr files moves. logger = logging.getLogger(__name__) - - - -def get_assigns_after_delete(total_pages, deleted_pages): - """ - given total pages and a list of deleted pages - returns - a list of assignations of pages: - [new_version_page_num, old_version_page_num] - Example 1: - total_pages: 6 - deleted_pages: [1, 2] - returns: [ - [(1, 3), (2, 4), (3, 5), (4, 6)] - # page #1 gets info from prev page #3 - # page #2 ... #4 - ... - # page #4 ... #6 - ] - - Example 2: - total pages: 5 - deleted_pages [1, 5] - returns: [ - [(1, 2), (2, 3), (3, 4) - ] - - Example 3: - total pages: 5 - deleted_pages [2, 3] - returns: [ - [(1, 1), (2, 4), (3, 5) - # page #1 stays unaffected - # page #2 gets the info from page number 4 - # page #3 gets info from page #5 - ] - """ - if total_pages < len(deleted_pages): - err_msg = f"total_pages < deleted_pages" - raise ValueError(err_msg) - - # only numbers of pages which were not deleted - pages = [ - page for page in list(range(1, total_pages + 1)) - if page not in deleted_pages - ] - - page_numbers = range(1, len(pages) + 1) - - return list(zip(page_numbers, pages)) - - - - def migrate_cutted_pages(dest_ep, src_doc_ep_list): """ dest_ep = destination document endpoint @@ -177,8 +124,3 @@ class OcrMigrate: src_page_ep=src_page_ep, dst_page_ep=dst_page_ep ) - - def migrate_reorder(self, new_order): - """ - Similar to migrate_delete, with minor tweaks. - """ diff --git a/mglib/pdftk.py b/mglib/pdftk.py index 05bb313..1b3891a 100644 --- a/mglib/pdftk.py +++ b/mglib/pdftk.py @@ -338,20 +338,17 @@ def reorder_pages( run(cmd) -def delete_pages(doc_ep, page_numbers): - ep_url = doc_ep.url() - page_count = get_pagecount(ep_url) +def delete_pages(src, dst, page_numbers): + page_count = get_pagecount(src) cat_ranges = cat_ranges_for_delete( page_count, page_numbers ) - doc_ep.inc_version() - cmd = [ "pdftk", - ep_url, + src, "cat" ] for page in cat_ranges: @@ -360,9 +357,6 @@ def delete_pages(doc_ep, page_numbers): ) cmd.append("output") - make_sure_path_exists(doc_ep.url()) - cmd.append(doc_ep.url()) + cmd.append(dst) run(cmd) - - return doc_ep.version diff --git a/mglib/storage.py b/mglib/storage.py index d8144b7..8d67633 100644 --- a/mglib/storage.py +++ b/mglib/storage.py @@ -4,7 +4,10 @@ from os.path import isdir, join import logging import shutil from mglib.step import Steps -from mglib.utils import safe_to_delete +from mglib.utils import ( + safe_to_delete, + get_assigns_after_delete +) from mglib import pdftk from mglib.path import PagePath, DocumentPath @@ -125,15 +128,6 @@ class Storage: self.path(_path) ) - def delete_pages(self, doc_path, page_numers): - """ - Delets pages in the document pointed by doc_path. - doc_path is an instance of mglib.path.DocumentPath - - In case of success returns document's new version. - """ - pass - def copy_page(self, src_page_path, dst_page_path): err_msg = "copy_page accepts only PageEp instances" @@ -252,6 +246,64 @@ class Storage: return doc_path.version + 1 + def delete_pages(self, doc_path, page_numbers): + """ + Delets pages in the document pointed by doc_path. + doc_path is an instance of mglib.path.DocumentPath + + In case of success returns document's new version. + """ + + if not isinstance(page_numbers, list): + logger.error("Expecting list argument") + return False + + src_doc_path = doc_path + dst_doc_path = DocumentPath.copy_from( + src_doc_path, + version=doc_path.version + 1 + ) + self.make_sure_path_exists( + self.abspath(dst_doc_path) + ) + pdftk.delete_pages( + self.abspath(src_doc_path), + self.abspath(dst_doc_path), + page_numbers + ) + + page_count = self.get_pagecount(doc_path) + if len(page_numbers) > page_count: + logger.error( + f"deleted_pages({page_numbers}) > page_count({page_count})" + ) + return + + assigns = get_assigns_after_delete( + total_pages=page_count, + deleted_pages=page_numbers + ) + for a in assigns: + for step in Steps(): + src_page_path = PagePath( + document_path=src_doc_path, + page_num=a[1], + step=step, + page_count=page_count + ) + dst_page_path = PagePath( + document_path=dst_doc_path, + page_num=a[0], + step=step, + page_count=page_count - len(page_numbers) + ) + self.copy_page( + src_page_path=src_page_path, + dst_page_path=dst_page_path + ) + + return doc_path.version + 1 + def paste_pages( self, dest_doc_path, diff --git a/mglib/utils.py b/mglib/utils.py index ee80f70..6793cb5 100644 --- a/mglib/utils.py +++ b/mglib/utils.py @@ -37,3 +37,50 @@ def safe_to_delete(place): return True + +def get_assigns_after_delete(total_pages, deleted_pages): + """ + given total pages and a list of deleted pages - returns + a list of assignations of pages: + [new_version_page_num, old_version_page_num] + Example 1: + total_pages: 6 + deleted_pages: [1, 2] + returns: [ + [(1, 3), (2, 4), (3, 5), (4, 6)] + # page #1 gets info from prev page #3 + # page #2 ... #4 + ... + # page #4 ... #6 + ] + + Example 2: + total pages: 5 + deleted_pages [1, 5] + returns: [ + [(1, 2), (2, 3), (3, 4) + ] + + Example 3: + total pages: 5 + deleted_pages [2, 3] + returns: [ + [(1, 1), (2, 4), (3, 5) + # page #1 stays unaffected + # page #2 gets the info from page number 4 + # page #3 gets info from page #5 + ] + """ + if total_pages < len(deleted_pages): + err_msg = f"total_pages < deleted_pages" + raise ValueError(err_msg) + + # only numbers of pages which were not deleted + pages = [ + page for page in list(range(1, total_pages + 1)) + if page not in deleted_pages + ] + + page_numbers = range(1, len(pages) + 1) + + return list(zip(page_numbers, pages))