refactoring

pull/3/head
Eugen Ciur 2020-05-17 09:01:59 +02:00
parent bf5342724a
commit 8df81235ba
3 changed files with 65 additions and 80 deletions

View File

@ -18,50 +18,6 @@ OcrMigrate class takes care of this sort of txt/hocr files moves.
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def migrate_cutted_pages(dest_ep, src_doc_ep_list):
"""
dest_ep = destination document endpoint
src_doc_ep_list = a list of following format:
[
{
'doc_ep': doc_ep,
'page_nums': [page_num_1, page_num_2, page_num_3]
},
{
'doc_ep': doc_ep,
'page_nums': [page_num_1, page_num_2, page_num_3]
},
...
]
with a list of source document with copied pages.
"""
dest_page_num = 1
dest_page_count = sum([
len(item['page_nums']) for item in src_doc_ep_list
])
for item in src_doc_ep_list:
src_ep = item['doc_ep']
for page_num in item['page_nums']:
for step in Steps():
src_page_ep = PageEp(
document_ep=src_ep,
page_num=int(page_num),
step=step,
page_count=get_pagecount(src_ep)
)
dst_page_ep = PageEp(
document_ep=dest_ep,
page_num=dest_page_num,
step=step,
page_count=dest_page_count
)
logger.debug(f"src={src_page_ep} dst={dst_page_ep}")
copy_page(
src_page_ep=src_page_ep,
dst_page_ep=dst_page_ep
)
dest_page_num += 1
class OcrMigrate: class OcrMigrate:
""" """

View File

@ -134,12 +134,12 @@ def split_ranges(total, after=False, before=False):
def paste_pages_into_existing_doc( def paste_pages_into_existing_doc(
dest_doc_ep, dst,
src_doc_ep_list, data_list,
after_page_number=False, after_page_number=False,
before_page_number=False before_page_number=False
): ):
page_count = get_pagecount(dest_doc_ep.url()) page_count = get_pagecount(dst)
list1, list2 = split_ranges( list1, list2 = split_ranges(
total=page_count, total=page_count,
after=after_page_number, after=after_page_number,
@ -155,24 +155,22 @@ def paste_pages_into_existing_doc(
letters_pages_after = [] letters_pages_after = []
letters_2_doc_map.append( letters_2_doc_map.append(
f"A={dest_doc_ep.url()}" f"A={dst.url()}"
) )
for idx in range(0, len(src_doc_ep_list)): for idx in range(0, len(data_list)):
letter = letters[idx] letter = letters[idx]
doc_ep = src_doc_ep_list[idx]['doc_ep'] src = data_list[idx]['src']
pages = src_doc_ep_list[idx]['page_nums'] pages = data_list[idx]['page_nums']
letters_2_doc_map.append( letters_2_doc_map.append(
f"{letter}={doc_ep.url()}" f"{letter}={src}"
) )
for p in pages: for p in pages:
letters_pages.append( letters_pages.append(
f"{letter}{p}" f"{letter}{p}"
) )
dest_doc_ep.inc_version()
for p in list1: for p in list1:
letters_pages_before.append( letters_pages_before.append(
f"A{p}" f"A{p}"
@ -200,19 +198,15 @@ def paste_pages_into_existing_doc(
cmd.append("output") cmd.append("output")
make_sure_path_exists(dest_doc_ep.url()) cmd.append(dst)
cmd.append(dest_doc_ep.url())
run(cmd) run(cmd)
return dest_doc_ep.version
def paste_pages( def paste_pages(
dest_doc_ep, dst,
src_doc_ep_list, data_list,
dest_doc_is_new=True, dst_doc_is_new=True,
after_page_number=False, after_page_number=False,
before_page_number=False before_page_number=False
): ):
@ -234,12 +228,12 @@ def paste_pages(
src_doc_ep_list is a list of documents where pages src_doc_ep_list is a list of documents where pages
(with numbers page_num_1...) will be paste from. (with numbers page_num_1...) will be paste from.
dest_doc_is_new = True well.. destination document was just created, dst_doc_is_new = True well.. destination document was just created,
we are pasting here cutted pages into some folder as new document. we are pasting here cutted pages into some folder as new document.
In this case 'after' and 'before' arguments are ignored In this case 'after' and 'before' arguments are ignored
dest_doc_is_new = False, pasting pages into exiting document. dst_doc_is_new = False, pasting pages into exiting document.
If before_page_number > 0 - paste pages before page number If before_page_number > 0 - paste pages before page number
'before_page_number' 'before_page_number'
If after_page_number > 0 - paste pages after page number If after_page_number > 0 - paste pages after page number
@ -250,10 +244,10 @@ def paste_pages(
If both before_page_number and after_page_number are < 0 - just paste If both before_page_number and after_page_number are < 0 - just paste
pages at the end of the document. pages at the end of the document.
""" """
if not dest_doc_is_new: if not dst_doc_is_new:
return paste_pages_into_existing_doc( return paste_pages_into_existing_doc(
dest_doc_ep=dest_doc_ep, dst=dst,
src_doc_ep_list=src_doc_ep_list, data_list=data_list,
after_page_number=after_page_number, after_page_number=after_page_number,
before_page_number=before_page_number before_page_number=before_page_number
) )
@ -261,21 +255,19 @@ def paste_pages(
letters_2_doc_map = [] letters_2_doc_map = []
letters_pages = [] letters_pages = []
for idx in range(0, len(src_doc_ep_list)): for idx in range(0, len(data_list)):
letter = letters[idx] letter = letters[idx]
doc_ep = src_doc_ep_list[idx]['doc_ep'] src = data_list[idx]['src']
pages = src_doc_ep_list[idx]['page_nums'] pages = data_list[idx]['page_nums']
letters_2_doc_map.append( letters_2_doc_map.append(
f"{letter}={doc_ep.url()}" f"{letter}={src}"
) )
for p in pages: for p in pages:
letters_pages.append( letters_pages.append(
f"{letter}{p}" f"{letter}{p}"
) )
dest_doc_ep.inc_version()
cmd = [ cmd = [
"pdftk", "pdftk",
] ]
@ -288,14 +280,10 @@ def paste_pages(
cmd.append("output") cmd.append("output")
make_sure_path_exists(dest_doc_ep.url()) cmd.append(dst)
cmd.append(dest_doc_ep.url())
run(cmd) run(cmd)
return dest_doc_ep.version
def reorder_pages( def reorder_pages(
src, dst, new_order src, dst, new_order

View File

@ -307,7 +307,7 @@ class Storage:
def paste_pages( def paste_pages(
self, self,
dest_doc_path, dest_doc_path,
src_doc_path, data_list,
dest_doc_is_new=False, dest_doc_is_new=False,
after_page_number=False, after_page_number=False,
before_page_number=False before_page_number=False
@ -317,7 +317,48 @@ class Storage:
from src_doc_path. Both dest and src are instances of from src_doc_path. Both dest and src are instances of
mglib.path.DocumentPath mglib.path.DocumentPath
""" """
pass next_ver_dp = DocumentPath.copy_from(
dest_doc_path,
version=dest_doc_path.version + 1
)
self.make_sure_path_exists(
self.abspath(next_ver_dp)
)
pdftk.paste_pages(
dst=self.abspath(next_ver_dp),
data_list=data_list,
dst_doc_is_new=dest_doc_is_new,
after_page_number=after_page_number,
before_page_number=before_page_number
)
dest_page_num = 1
dest_page_count = sum([
len(item['page_nums']) for item in data_list
])
for item in data_list:
src_path = item['doc_path']
for page_num in item['page_nums']:
for step in Steps():
src_page_path = PagePath(
document_path=src_path,
page_num=int(page_num),
step=step,
page_count=self.get_pagecount(src_path)
)
dst_page_path = PagePath(
document_path=next_ver_dp,
page_num=dest_page_num,
step=step,
page_count=dest_page_count
)
logger.debug(f"src={src_page_path} dst={dst_page_path}")
self.copy_page(
src_page_path=src_page_path,
dst_page_path=dst_page_path
)
dest_page_num += 1
class FileSystemStorage(Storage): class FileSystemStorage(Storage):