2020-04-25 10:17:48 +02:00
|
|
|
import os
|
2020-05-04 17:09:02 +02:00
|
|
|
import logging
|
2020-06-22 08:35:39 +02:00
|
|
|
import importlib.machinery
|
|
|
|
import importlib.util
|
|
|
|
|
2020-05-04 17:09:02 +02:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2020-07-14 09:01:46 +02:00
|
|
|
SAFE_EXTENSIONS = [
|
|
|
|
'.txt',
|
|
|
|
'.jpg',
|
|
|
|
'.jpeg',
|
|
|
|
'.png',
|
|
|
|
'.hocr',
|
|
|
|
'.pdf',
|
2020-07-16 15:02:20 +02:00
|
|
|
'.tiff'
|
2020-07-14 09:01:46 +02:00
|
|
|
]
|
2020-04-25 10:17:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_bool(key, default="NO"):
|
|
|
|
"""
|
|
|
|
Returns True if environment variable named KEY is one of
|
|
|
|
"yes", "y", "t", "true" (lowercase of uppercase)
|
|
|
|
|
|
|
|
otherwise returns False
|
|
|
|
"""
|
|
|
|
env_var_value = os.getenv(key, default).lower()
|
|
|
|
YES_VALUES = ("yes", "y", "1", "t", "true")
|
|
|
|
if env_var_value in YES_VALUES:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
2020-05-04 17:09:02 +02:00
|
|
|
|
|
|
|
|
|
|
|
def safe_to_delete(place):
|
|
|
|
if not os.path.exists(place):
|
|
|
|
logging.warning(
|
|
|
|
f"Trying to delete not exising folder"
|
|
|
|
f" {place}"
|
|
|
|
)
|
|
|
|
return False
|
|
|
|
|
|
|
|
for root, dirs, files in os.walk(place):
|
|
|
|
for name in files:
|
|
|
|
base, ext = os.path.splitext(name)
|
|
|
|
if ext not in SAFE_EXTENSIONS:
|
2020-08-17 11:24:16 +02:00
|
|
|
logger.warning(
|
|
|
|
f"Trying to delete unsefe location: "
|
|
|
|
f"extention={ext} not found in {SAFE_EXTENSIONS}"
|
|
|
|
)
|
|
|
|
return False
|
2020-05-04 17:09:02 +02:00
|
|
|
|
|
|
|
return True
|
|
|
|
|
2020-05-17 07:26:38 +02:00
|
|
|
|
|
|
|
def get_assigns_after_delete(total_pages, deleted_pages):
|
|
|
|
"""
|
|
|
|
given total pages and a list of deleted pages - returns
|
|
|
|
a list of assignations of pages:
|
|
|
|
[new_version_page_num, old_version_page_num]
|
|
|
|
Example 1:
|
|
|
|
total_pages: 6
|
|
|
|
deleted_pages: [1, 2]
|
|
|
|
returns: [
|
|
|
|
[(1, 3), (2, 4), (3, 5), (4, 6)]
|
|
|
|
# page #1 gets info from prev page #3
|
|
|
|
# page #2 ... #4
|
|
|
|
...
|
|
|
|
# page #4 ... #6
|
|
|
|
]
|
|
|
|
|
|
|
|
Example 2:
|
|
|
|
total pages: 5
|
|
|
|
deleted_pages [1, 5]
|
|
|
|
returns: [
|
|
|
|
[(1, 2), (2, 3), (3, 4)
|
|
|
|
]
|
|
|
|
|
|
|
|
Example 3:
|
|
|
|
total pages: 5
|
|
|
|
deleted_pages [2, 3]
|
|
|
|
returns: [
|
|
|
|
[(1, 1), (2, 4), (3, 5)
|
|
|
|
# page #1 stays unaffected
|
|
|
|
# page #2 gets the info from page number 4
|
|
|
|
# page #3 gets info from page #5
|
|
|
|
]
|
|
|
|
"""
|
|
|
|
if total_pages < len(deleted_pages):
|
2020-11-29 08:54:51 +01:00
|
|
|
err_msg = "total_pages < deleted_pages"
|
2020-05-17 07:26:38 +02:00
|
|
|
raise ValueError(err_msg)
|
|
|
|
|
|
|
|
# only numbers of pages which were not deleted
|
|
|
|
pages = [
|
|
|
|
page for page in list(range(1, total_pages + 1))
|
|
|
|
if page not in deleted_pages
|
|
|
|
]
|
|
|
|
|
|
|
|
page_numbers = range(1, len(pages) + 1)
|
|
|
|
|
|
|
|
return list(zip(page_numbers, pages))
|
2020-06-22 08:35:39 +02:00
|
|
|
|
|
|
|
|
|
|
|
def load_config(config_file):
|
|
|
|
|
|
|
|
loader_ = importlib.machinery.SourceFileLoader(
|
|
|
|
"config",
|
|
|
|
config_file
|
|
|
|
)
|
|
|
|
spec = importlib.util.spec_from_file_location(
|
|
|
|
"config", config_file, loader=loader_
|
|
|
|
)
|
|
|
|
mod = importlib.util.module_from_spec(spec)
|
|
|
|
spec.loader.exec_module(mod)
|
|
|
|
# and stop looking for ther configs.
|
|
|
|
cfg_papermerge = vars(mod)
|
|
|
|
cfg_file_found = True
|
|
|
|
|
|
|
|
return cfg_papermerge, cfg_file_found
|
|
|
|
|
|
|
|
|
|
|
|
def try_load_config(config_locations, config_env_var_name):
|
|
|
|
|
|
|
|
cfg_file_found = False
|
|
|
|
cfg_papermerge = {}
|
|
|
|
|
|
|
|
for config_file in config_locations:
|
|
|
|
if os.path.exists(config_file):
|
|
|
|
cfg_papermerge, cfg_file_found = load_config(config_file)
|
|
|
|
if cfg_file_found:
|
|
|
|
break
|
|
|
|
|
|
|
|
if not cfg_file_found:
|
|
|
|
config_file = os.environ.get(config_env_var_name, False)
|
|
|
|
if config_file:
|
|
|
|
try:
|
|
|
|
cfg_papermerge, cfg_file_found = load_config(config_file)
|
|
|
|
except FileNotFoundError:
|
2020-11-29 08:54:51 +01:00
|
|
|
err_msg = "Failed attempted to read" +\
|
2020-06-22 08:35:39 +02:00
|
|
|
f" configuration file '{config_file}'" +\
|
|
|
|
f" pointed by environment variable '{config_env_var_name}'"
|
|
|
|
raise FileNotFoundError(err_msg)
|
|
|
|
|
|
|
|
return cfg_papermerge
|