mirror of https://github.com/papermerge/mglib
refactoring
parent
bf5342724a
commit
8df81235ba
|
@ -18,50 +18,6 @@ OcrMigrate class takes care of this sort of txt/hocr files moves.
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def migrate_cutted_pages(dest_ep, src_doc_ep_list):
|
|
||||||
"""
|
|
||||||
dest_ep = destination document endpoint
|
|
||||||
src_doc_ep_list = a list of following format:
|
|
||||||
[
|
|
||||||
{
|
|
||||||
'doc_ep': doc_ep,
|
|
||||||
'page_nums': [page_num_1, page_num_2, page_num_3]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'doc_ep': doc_ep,
|
|
||||||
'page_nums': [page_num_1, page_num_2, page_num_3]
|
|
||||||
},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
with a list of source document with copied pages.
|
|
||||||
"""
|
|
||||||
dest_page_num = 1
|
|
||||||
dest_page_count = sum([
|
|
||||||
len(item['page_nums']) for item in src_doc_ep_list
|
|
||||||
])
|
|
||||||
for item in src_doc_ep_list:
|
|
||||||
src_ep = item['doc_ep']
|
|
||||||
for page_num in item['page_nums']:
|
|
||||||
for step in Steps():
|
|
||||||
src_page_ep = PageEp(
|
|
||||||
document_ep=src_ep,
|
|
||||||
page_num=int(page_num),
|
|
||||||
step=step,
|
|
||||||
page_count=get_pagecount(src_ep)
|
|
||||||
)
|
|
||||||
dst_page_ep = PageEp(
|
|
||||||
document_ep=dest_ep,
|
|
||||||
page_num=dest_page_num,
|
|
||||||
step=step,
|
|
||||||
page_count=dest_page_count
|
|
||||||
)
|
|
||||||
logger.debug(f"src={src_page_ep} dst={dst_page_ep}")
|
|
||||||
copy_page(
|
|
||||||
src_page_ep=src_page_ep,
|
|
||||||
dst_page_ep=dst_page_ep
|
|
||||||
)
|
|
||||||
dest_page_num += 1
|
|
||||||
|
|
||||||
|
|
||||||
class OcrMigrate:
|
class OcrMigrate:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -134,12 +134,12 @@ def split_ranges(total, after=False, before=False):
|
||||||
|
|
||||||
|
|
||||||
def paste_pages_into_existing_doc(
|
def paste_pages_into_existing_doc(
|
||||||
dest_doc_ep,
|
dst,
|
||||||
src_doc_ep_list,
|
data_list,
|
||||||
after_page_number=False,
|
after_page_number=False,
|
||||||
before_page_number=False
|
before_page_number=False
|
||||||
):
|
):
|
||||||
page_count = get_pagecount(dest_doc_ep.url())
|
page_count = get_pagecount(dst)
|
||||||
list1, list2 = split_ranges(
|
list1, list2 = split_ranges(
|
||||||
total=page_count,
|
total=page_count,
|
||||||
after=after_page_number,
|
after=after_page_number,
|
||||||
|
@ -155,24 +155,22 @@ def paste_pages_into_existing_doc(
|
||||||
letters_pages_after = []
|
letters_pages_after = []
|
||||||
|
|
||||||
letters_2_doc_map.append(
|
letters_2_doc_map.append(
|
||||||
f"A={dest_doc_ep.url()}"
|
f"A={dst.url()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
for idx in range(0, len(src_doc_ep_list)):
|
for idx in range(0, len(data_list)):
|
||||||
letter = letters[idx]
|
letter = letters[idx]
|
||||||
doc_ep = src_doc_ep_list[idx]['doc_ep']
|
src = data_list[idx]['src']
|
||||||
pages = src_doc_ep_list[idx]['page_nums']
|
pages = data_list[idx]['page_nums']
|
||||||
|
|
||||||
letters_2_doc_map.append(
|
letters_2_doc_map.append(
|
||||||
f"{letter}={doc_ep.url()}"
|
f"{letter}={src}"
|
||||||
)
|
)
|
||||||
for p in pages:
|
for p in pages:
|
||||||
letters_pages.append(
|
letters_pages.append(
|
||||||
f"{letter}{p}"
|
f"{letter}{p}"
|
||||||
)
|
)
|
||||||
|
|
||||||
dest_doc_ep.inc_version()
|
|
||||||
|
|
||||||
for p in list1:
|
for p in list1:
|
||||||
letters_pages_before.append(
|
letters_pages_before.append(
|
||||||
f"A{p}"
|
f"A{p}"
|
||||||
|
@ -200,19 +198,15 @@ def paste_pages_into_existing_doc(
|
||||||
|
|
||||||
cmd.append("output")
|
cmd.append("output")
|
||||||
|
|
||||||
make_sure_path_exists(dest_doc_ep.url())
|
cmd.append(dst)
|
||||||
|
|
||||||
cmd.append(dest_doc_ep.url())
|
|
||||||
|
|
||||||
run(cmd)
|
run(cmd)
|
||||||
|
|
||||||
return dest_doc_ep.version
|
|
||||||
|
|
||||||
|
|
||||||
def paste_pages(
|
def paste_pages(
|
||||||
dest_doc_ep,
|
dst,
|
||||||
src_doc_ep_list,
|
data_list,
|
||||||
dest_doc_is_new=True,
|
dst_doc_is_new=True,
|
||||||
after_page_number=False,
|
after_page_number=False,
|
||||||
before_page_number=False
|
before_page_number=False
|
||||||
):
|
):
|
||||||
|
@ -234,12 +228,12 @@ def paste_pages(
|
||||||
src_doc_ep_list is a list of documents where pages
|
src_doc_ep_list is a list of documents where pages
|
||||||
(with numbers page_num_1...) will be paste from.
|
(with numbers page_num_1...) will be paste from.
|
||||||
|
|
||||||
dest_doc_is_new = True well.. destination document was just created,
|
dst_doc_is_new = True well.. destination document was just created,
|
||||||
we are pasting here cutted pages into some folder as new document.
|
we are pasting here cutted pages into some folder as new document.
|
||||||
|
|
||||||
In this case 'after' and 'before' arguments are ignored
|
In this case 'after' and 'before' arguments are ignored
|
||||||
|
|
||||||
dest_doc_is_new = False, pasting pages into exiting document.
|
dst_doc_is_new = False, pasting pages into exiting document.
|
||||||
If before_page_number > 0 - paste pages before page number
|
If before_page_number > 0 - paste pages before page number
|
||||||
'before_page_number'
|
'before_page_number'
|
||||||
If after_page_number > 0 - paste pages after page number
|
If after_page_number > 0 - paste pages after page number
|
||||||
|
@ -250,10 +244,10 @@ def paste_pages(
|
||||||
If both before_page_number and after_page_number are < 0 - just paste
|
If both before_page_number and after_page_number are < 0 - just paste
|
||||||
pages at the end of the document.
|
pages at the end of the document.
|
||||||
"""
|
"""
|
||||||
if not dest_doc_is_new:
|
if not dst_doc_is_new:
|
||||||
return paste_pages_into_existing_doc(
|
return paste_pages_into_existing_doc(
|
||||||
dest_doc_ep=dest_doc_ep,
|
dst=dst,
|
||||||
src_doc_ep_list=src_doc_ep_list,
|
data_list=data_list,
|
||||||
after_page_number=after_page_number,
|
after_page_number=after_page_number,
|
||||||
before_page_number=before_page_number
|
before_page_number=before_page_number
|
||||||
)
|
)
|
||||||
|
@ -261,21 +255,19 @@ def paste_pages(
|
||||||
letters_2_doc_map = []
|
letters_2_doc_map = []
|
||||||
letters_pages = []
|
letters_pages = []
|
||||||
|
|
||||||
for idx in range(0, len(src_doc_ep_list)):
|
for idx in range(0, len(data_list)):
|
||||||
letter = letters[idx]
|
letter = letters[idx]
|
||||||
doc_ep = src_doc_ep_list[idx]['doc_ep']
|
src = data_list[idx]['src']
|
||||||
pages = src_doc_ep_list[idx]['page_nums']
|
pages = data_list[idx]['page_nums']
|
||||||
|
|
||||||
letters_2_doc_map.append(
|
letters_2_doc_map.append(
|
||||||
f"{letter}={doc_ep.url()}"
|
f"{letter}={src}"
|
||||||
)
|
)
|
||||||
for p in pages:
|
for p in pages:
|
||||||
letters_pages.append(
|
letters_pages.append(
|
||||||
f"{letter}{p}"
|
f"{letter}{p}"
|
||||||
)
|
)
|
||||||
|
|
||||||
dest_doc_ep.inc_version()
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"pdftk",
|
"pdftk",
|
||||||
]
|
]
|
||||||
|
@ -288,14 +280,10 @@ def paste_pages(
|
||||||
|
|
||||||
cmd.append("output")
|
cmd.append("output")
|
||||||
|
|
||||||
make_sure_path_exists(dest_doc_ep.url())
|
cmd.append(dst)
|
||||||
|
|
||||||
cmd.append(dest_doc_ep.url())
|
|
||||||
|
|
||||||
run(cmd)
|
run(cmd)
|
||||||
|
|
||||||
return dest_doc_ep.version
|
|
||||||
|
|
||||||
|
|
||||||
def reorder_pages(
|
def reorder_pages(
|
||||||
src, dst, new_order
|
src, dst, new_order
|
||||||
|
|
|
@ -307,7 +307,7 @@ class Storage:
|
||||||
def paste_pages(
|
def paste_pages(
|
||||||
self,
|
self,
|
||||||
dest_doc_path,
|
dest_doc_path,
|
||||||
src_doc_path,
|
data_list,
|
||||||
dest_doc_is_new=False,
|
dest_doc_is_new=False,
|
||||||
after_page_number=False,
|
after_page_number=False,
|
||||||
before_page_number=False
|
before_page_number=False
|
||||||
|
@ -317,7 +317,48 @@ class Storage:
|
||||||
from src_doc_path. Both dest and src are instances of
|
from src_doc_path. Both dest and src are instances of
|
||||||
mglib.path.DocumentPath
|
mglib.path.DocumentPath
|
||||||
"""
|
"""
|
||||||
pass
|
next_ver_dp = DocumentPath.copy_from(
|
||||||
|
dest_doc_path,
|
||||||
|
version=dest_doc_path.version + 1
|
||||||
|
)
|
||||||
|
self.make_sure_path_exists(
|
||||||
|
self.abspath(next_ver_dp)
|
||||||
|
)
|
||||||
|
|
||||||
|
pdftk.paste_pages(
|
||||||
|
dst=self.abspath(next_ver_dp),
|
||||||
|
data_list=data_list,
|
||||||
|
dst_doc_is_new=dest_doc_is_new,
|
||||||
|
after_page_number=after_page_number,
|
||||||
|
before_page_number=before_page_number
|
||||||
|
)
|
||||||
|
|
||||||
|
dest_page_num = 1
|
||||||
|
dest_page_count = sum([
|
||||||
|
len(item['page_nums']) for item in data_list
|
||||||
|
])
|
||||||
|
for item in data_list:
|
||||||
|
src_path = item['doc_path']
|
||||||
|
for page_num in item['page_nums']:
|
||||||
|
for step in Steps():
|
||||||
|
src_page_path = PagePath(
|
||||||
|
document_path=src_path,
|
||||||
|
page_num=int(page_num),
|
||||||
|
step=step,
|
||||||
|
page_count=self.get_pagecount(src_path)
|
||||||
|
)
|
||||||
|
dst_page_path = PagePath(
|
||||||
|
document_path=next_ver_dp,
|
||||||
|
page_num=dest_page_num,
|
||||||
|
step=step,
|
||||||
|
page_count=dest_page_count
|
||||||
|
)
|
||||||
|
logger.debug(f"src={src_page_path} dst={dst_page_path}")
|
||||||
|
self.copy_page(
|
||||||
|
src_page_path=src_page_path,
|
||||||
|
dst_page_path=dst_page_path
|
||||||
|
)
|
||||||
|
dest_page_num += 1
|
||||||
|
|
||||||
|
|
||||||
class FileSystemStorage(Storage):
|
class FileSystemStorage(Storage):
|
||||||
|
|
Loading…
Reference in New Issue