mirror of https://github.com/papermerge/mglib
refactoring
parent
bf5342724a
commit
8df81235ba
|
@ -18,50 +18,6 @@ OcrMigrate class takes care of this sort of txt/hocr files moves.
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def migrate_cutted_pages(dest_ep, src_doc_ep_list):
|
||||
"""
|
||||
dest_ep = destination document endpoint
|
||||
src_doc_ep_list = a list of following format:
|
||||
[
|
||||
{
|
||||
'doc_ep': doc_ep,
|
||||
'page_nums': [page_num_1, page_num_2, page_num_3]
|
||||
},
|
||||
{
|
||||
'doc_ep': doc_ep,
|
||||
'page_nums': [page_num_1, page_num_2, page_num_3]
|
||||
},
|
||||
...
|
||||
]
|
||||
with a list of source document with copied pages.
|
||||
"""
|
||||
dest_page_num = 1
|
||||
dest_page_count = sum([
|
||||
len(item['page_nums']) for item in src_doc_ep_list
|
||||
])
|
||||
for item in src_doc_ep_list:
|
||||
src_ep = item['doc_ep']
|
||||
for page_num in item['page_nums']:
|
||||
for step in Steps():
|
||||
src_page_ep = PageEp(
|
||||
document_ep=src_ep,
|
||||
page_num=int(page_num),
|
||||
step=step,
|
||||
page_count=get_pagecount(src_ep)
|
||||
)
|
||||
dst_page_ep = PageEp(
|
||||
document_ep=dest_ep,
|
||||
page_num=dest_page_num,
|
||||
step=step,
|
||||
page_count=dest_page_count
|
||||
)
|
||||
logger.debug(f"src={src_page_ep} dst={dst_page_ep}")
|
||||
copy_page(
|
||||
src_page_ep=src_page_ep,
|
||||
dst_page_ep=dst_page_ep
|
||||
)
|
||||
dest_page_num += 1
|
||||
|
||||
|
||||
class OcrMigrate:
|
||||
"""
|
||||
|
|
|
@ -134,12 +134,12 @@ def split_ranges(total, after=False, before=False):
|
|||
|
||||
|
||||
def paste_pages_into_existing_doc(
|
||||
dest_doc_ep,
|
||||
src_doc_ep_list,
|
||||
dst,
|
||||
data_list,
|
||||
after_page_number=False,
|
||||
before_page_number=False
|
||||
):
|
||||
page_count = get_pagecount(dest_doc_ep.url())
|
||||
page_count = get_pagecount(dst)
|
||||
list1, list2 = split_ranges(
|
||||
total=page_count,
|
||||
after=after_page_number,
|
||||
|
@ -155,24 +155,22 @@ def paste_pages_into_existing_doc(
|
|||
letters_pages_after = []
|
||||
|
||||
letters_2_doc_map.append(
|
||||
f"A={dest_doc_ep.url()}"
|
||||
f"A={dst.url()}"
|
||||
)
|
||||
|
||||
for idx in range(0, len(src_doc_ep_list)):
|
||||
for idx in range(0, len(data_list)):
|
||||
letter = letters[idx]
|
||||
doc_ep = src_doc_ep_list[idx]['doc_ep']
|
||||
pages = src_doc_ep_list[idx]['page_nums']
|
||||
src = data_list[idx]['src']
|
||||
pages = data_list[idx]['page_nums']
|
||||
|
||||
letters_2_doc_map.append(
|
||||
f"{letter}={doc_ep.url()}"
|
||||
f"{letter}={src}"
|
||||
)
|
||||
for p in pages:
|
||||
letters_pages.append(
|
||||
f"{letter}{p}"
|
||||
)
|
||||
|
||||
dest_doc_ep.inc_version()
|
||||
|
||||
for p in list1:
|
||||
letters_pages_before.append(
|
||||
f"A{p}"
|
||||
|
@ -200,19 +198,15 @@ def paste_pages_into_existing_doc(
|
|||
|
||||
cmd.append("output")
|
||||
|
||||
make_sure_path_exists(dest_doc_ep.url())
|
||||
|
||||
cmd.append(dest_doc_ep.url())
|
||||
cmd.append(dst)
|
||||
|
||||
run(cmd)
|
||||
|
||||
return dest_doc_ep.version
|
||||
|
||||
|
||||
def paste_pages(
|
||||
dest_doc_ep,
|
||||
src_doc_ep_list,
|
||||
dest_doc_is_new=True,
|
||||
dst,
|
||||
data_list,
|
||||
dst_doc_is_new=True,
|
||||
after_page_number=False,
|
||||
before_page_number=False
|
||||
):
|
||||
|
@ -234,12 +228,12 @@ def paste_pages(
|
|||
src_doc_ep_list is a list of documents where pages
|
||||
(with numbers page_num_1...) will be paste from.
|
||||
|
||||
dest_doc_is_new = True well.. destination document was just created,
|
||||
dst_doc_is_new = True well.. destination document was just created,
|
||||
we are pasting here cutted pages into some folder as new document.
|
||||
|
||||
In this case 'after' and 'before' arguments are ignored
|
||||
|
||||
dest_doc_is_new = False, pasting pages into exiting document.
|
||||
dst_doc_is_new = False, pasting pages into exiting document.
|
||||
If before_page_number > 0 - paste pages before page number
|
||||
'before_page_number'
|
||||
If after_page_number > 0 - paste pages after page number
|
||||
|
@ -250,10 +244,10 @@ def paste_pages(
|
|||
If both before_page_number and after_page_number are < 0 - just paste
|
||||
pages at the end of the document.
|
||||
"""
|
||||
if not dest_doc_is_new:
|
||||
if not dst_doc_is_new:
|
||||
return paste_pages_into_existing_doc(
|
||||
dest_doc_ep=dest_doc_ep,
|
||||
src_doc_ep_list=src_doc_ep_list,
|
||||
dst=dst,
|
||||
data_list=data_list,
|
||||
after_page_number=after_page_number,
|
||||
before_page_number=before_page_number
|
||||
)
|
||||
|
@ -261,21 +255,19 @@ def paste_pages(
|
|||
letters_2_doc_map = []
|
||||
letters_pages = []
|
||||
|
||||
for idx in range(0, len(src_doc_ep_list)):
|
||||
for idx in range(0, len(data_list)):
|
||||
letter = letters[idx]
|
||||
doc_ep = src_doc_ep_list[idx]['doc_ep']
|
||||
pages = src_doc_ep_list[idx]['page_nums']
|
||||
src = data_list[idx]['src']
|
||||
pages = data_list[idx]['page_nums']
|
||||
|
||||
letters_2_doc_map.append(
|
||||
f"{letter}={doc_ep.url()}"
|
||||
f"{letter}={src}"
|
||||
)
|
||||
for p in pages:
|
||||
letters_pages.append(
|
||||
f"{letter}{p}"
|
||||
)
|
||||
|
||||
dest_doc_ep.inc_version()
|
||||
|
||||
cmd = [
|
||||
"pdftk",
|
||||
]
|
||||
|
@ -288,14 +280,10 @@ def paste_pages(
|
|||
|
||||
cmd.append("output")
|
||||
|
||||
make_sure_path_exists(dest_doc_ep.url())
|
||||
|
||||
cmd.append(dest_doc_ep.url())
|
||||
cmd.append(dst)
|
||||
|
||||
run(cmd)
|
||||
|
||||
return dest_doc_ep.version
|
||||
|
||||
|
||||
def reorder_pages(
|
||||
src, dst, new_order
|
||||
|
|
|
@ -307,7 +307,7 @@ class Storage:
|
|||
def paste_pages(
|
||||
self,
|
||||
dest_doc_path,
|
||||
src_doc_path,
|
||||
data_list,
|
||||
dest_doc_is_new=False,
|
||||
after_page_number=False,
|
||||
before_page_number=False
|
||||
|
@ -317,7 +317,48 @@ class Storage:
|
|||
from src_doc_path. Both dest and src are instances of
|
||||
mglib.path.DocumentPath
|
||||
"""
|
||||
pass
|
||||
next_ver_dp = DocumentPath.copy_from(
|
||||
dest_doc_path,
|
||||
version=dest_doc_path.version + 1
|
||||
)
|
||||
self.make_sure_path_exists(
|
||||
self.abspath(next_ver_dp)
|
||||
)
|
||||
|
||||
pdftk.paste_pages(
|
||||
dst=self.abspath(next_ver_dp),
|
||||
data_list=data_list,
|
||||
dst_doc_is_new=dest_doc_is_new,
|
||||
after_page_number=after_page_number,
|
||||
before_page_number=before_page_number
|
||||
)
|
||||
|
||||
dest_page_num = 1
|
||||
dest_page_count = sum([
|
||||
len(item['page_nums']) for item in data_list
|
||||
])
|
||||
for item in data_list:
|
||||
src_path = item['doc_path']
|
||||
for page_num in item['page_nums']:
|
||||
for step in Steps():
|
||||
src_page_path = PagePath(
|
||||
document_path=src_path,
|
||||
page_num=int(page_num),
|
||||
step=step,
|
||||
page_count=self.get_pagecount(src_path)
|
||||
)
|
||||
dst_page_path = PagePath(
|
||||
document_path=next_ver_dp,
|
||||
page_num=dest_page_num,
|
||||
step=step,
|
||||
page_count=dest_page_count
|
||||
)
|
||||
logger.debug(f"src={src_page_path} dst={dst_page_path}")
|
||||
self.copy_page(
|
||||
src_page_path=src_page_path,
|
||||
dst_page_path=dst_page_path
|
||||
)
|
||||
dest_page_num += 1
|
||||
|
||||
|
||||
class FileSystemStorage(Storage):
|
||||
|
|
Loading…
Reference in New Issue