refactoring

pull/3/head
Eugen Ciur 2020-05-17 09:01:59 +02:00
parent bf5342724a
commit 8df81235ba
3 changed files with 65 additions and 80 deletions

View File

@ -18,50 +18,6 @@ OcrMigrate class takes care of this sort of txt/hocr files moves.
logger = logging.getLogger(__name__)
def migrate_cutted_pages(dest_ep, src_doc_ep_list):
"""
dest_ep = destination document endpoint
src_doc_ep_list = a list of following format:
[
{
'doc_ep': doc_ep,
'page_nums': [page_num_1, page_num_2, page_num_3]
},
{
'doc_ep': doc_ep,
'page_nums': [page_num_1, page_num_2, page_num_3]
},
...
]
with a list of source document with copied pages.
"""
dest_page_num = 1
dest_page_count = sum([
len(item['page_nums']) for item in src_doc_ep_list
])
for item in src_doc_ep_list:
src_ep = item['doc_ep']
for page_num in item['page_nums']:
for step in Steps():
src_page_ep = PageEp(
document_ep=src_ep,
page_num=int(page_num),
step=step,
page_count=get_pagecount(src_ep)
)
dst_page_ep = PageEp(
document_ep=dest_ep,
page_num=dest_page_num,
step=step,
page_count=dest_page_count
)
logger.debug(f"src={src_page_ep} dst={dst_page_ep}")
copy_page(
src_page_ep=src_page_ep,
dst_page_ep=dst_page_ep
)
dest_page_num += 1
class OcrMigrate:
"""

View File

@ -134,12 +134,12 @@ def split_ranges(total, after=False, before=False):
def paste_pages_into_existing_doc(
dest_doc_ep,
src_doc_ep_list,
dst,
data_list,
after_page_number=False,
before_page_number=False
):
page_count = get_pagecount(dest_doc_ep.url())
page_count = get_pagecount(dst)
list1, list2 = split_ranges(
total=page_count,
after=after_page_number,
@ -155,24 +155,22 @@ def paste_pages_into_existing_doc(
letters_pages_after = []
letters_2_doc_map.append(
f"A={dest_doc_ep.url()}"
f"A={dst.url()}"
)
for idx in range(0, len(src_doc_ep_list)):
for idx in range(0, len(data_list)):
letter = letters[idx]
doc_ep = src_doc_ep_list[idx]['doc_ep']
pages = src_doc_ep_list[idx]['page_nums']
src = data_list[idx]['src']
pages = data_list[idx]['page_nums']
letters_2_doc_map.append(
f"{letter}={doc_ep.url()}"
f"{letter}={src}"
)
for p in pages:
letters_pages.append(
f"{letter}{p}"
)
dest_doc_ep.inc_version()
for p in list1:
letters_pages_before.append(
f"A{p}"
@ -200,19 +198,15 @@ def paste_pages_into_existing_doc(
cmd.append("output")
make_sure_path_exists(dest_doc_ep.url())
cmd.append(dest_doc_ep.url())
cmd.append(dst)
run(cmd)
return dest_doc_ep.version
def paste_pages(
dest_doc_ep,
src_doc_ep_list,
dest_doc_is_new=True,
dst,
data_list,
dst_doc_is_new=True,
after_page_number=False,
before_page_number=False
):
@ -234,12 +228,12 @@ def paste_pages(
src_doc_ep_list is a list of documents where pages
(with numbers page_num_1...) will be paste from.
dest_doc_is_new = True well.. destination document was just created,
dst_doc_is_new = True well.. destination document was just created,
we are pasting here cutted pages into some folder as new document.
In this case 'after' and 'before' arguments are ignored
dest_doc_is_new = False, pasting pages into exiting document.
dst_doc_is_new = False, pasting pages into exiting document.
If before_page_number > 0 - paste pages before page number
'before_page_number'
If after_page_number > 0 - paste pages after page number
@ -250,10 +244,10 @@ def paste_pages(
If both before_page_number and after_page_number are < 0 - just paste
pages at the end of the document.
"""
if not dest_doc_is_new:
if not dst_doc_is_new:
return paste_pages_into_existing_doc(
dest_doc_ep=dest_doc_ep,
src_doc_ep_list=src_doc_ep_list,
dst=dst,
data_list=data_list,
after_page_number=after_page_number,
before_page_number=before_page_number
)
@ -261,21 +255,19 @@ def paste_pages(
letters_2_doc_map = []
letters_pages = []
for idx in range(0, len(src_doc_ep_list)):
for idx in range(0, len(data_list)):
letter = letters[idx]
doc_ep = src_doc_ep_list[idx]['doc_ep']
pages = src_doc_ep_list[idx]['page_nums']
src = data_list[idx]['src']
pages = data_list[idx]['page_nums']
letters_2_doc_map.append(
f"{letter}={doc_ep.url()}"
f"{letter}={src}"
)
for p in pages:
letters_pages.append(
f"{letter}{p}"
)
dest_doc_ep.inc_version()
cmd = [
"pdftk",
]
@ -288,14 +280,10 @@ def paste_pages(
cmd.append("output")
make_sure_path_exists(dest_doc_ep.url())
cmd.append(dest_doc_ep.url())
cmd.append(dst)
run(cmd)
return dest_doc_ep.version
def reorder_pages(
src, dst, new_order

View File

@ -307,7 +307,7 @@ class Storage:
def paste_pages(
self,
dest_doc_path,
src_doc_path,
data_list,
dest_doc_is_new=False,
after_page_number=False,
before_page_number=False
@ -317,7 +317,48 @@ class Storage:
from src_doc_path. Both dest and src are instances of
mglib.path.DocumentPath
"""
pass
next_ver_dp = DocumentPath.copy_from(
dest_doc_path,
version=dest_doc_path.version + 1
)
self.make_sure_path_exists(
self.abspath(next_ver_dp)
)
pdftk.paste_pages(
dst=self.abspath(next_ver_dp),
data_list=data_list,
dst_doc_is_new=dest_doc_is_new,
after_page_number=after_page_number,
before_page_number=before_page_number
)
dest_page_num = 1
dest_page_count = sum([
len(item['page_nums']) for item in data_list
])
for item in data_list:
src_path = item['doc_path']
for page_num in item['page_nums']:
for step in Steps():
src_page_path = PagePath(
document_path=src_path,
page_num=int(page_num),
step=step,
page_count=self.get_pagecount(src_path)
)
dst_page_path = PagePath(
document_path=next_ver_dp,
page_num=dest_page_num,
step=step,
page_count=dest_page_count
)
logger.debug(f"src={src_page_path} dst={dst_page_path}")
self.copy_page(
src_page_path=src_page_path,
dst_page_path=dst_page_path
)
dest_page_num += 1
class FileSystemStorage(Storage):