diff --git a/changelog.md b/changelog.md index 27a1709..439e5cb 100644 --- a/changelog.md +++ b/changelog.md @@ -1,11 +1,12 @@ # Changelog -## [1.2.0] - 16 July 2020 +## [1.2.1] - 16 July 2020 ### Added - - shortcuts.extract_img - resizes/converts images jpg, png documents - + - shortcuts.resize_img - resizes/converts images jpg, png documents + - change get_pagecount to work with tiff files as well + - bring in last modules from pmworker (mime and wrapper) ## [1.1.0] - 25 June 2020 diff --git a/mglib/mime.py b/mglib/mime.py new file mode 100644 index 0000000..dd03eec --- /dev/null +++ b/mglib/mime.py @@ -0,0 +1,40 @@ +import logging +from pmworker import wrapper + + +logger = logging.getLogger(__name__) + + +class Mime(wrapper.Wrapper): + def __init__(self, filepath): + super().__init__(exec_name="file") + self.filepath = filepath + + def get_cmd(self): + cmd = super().get_cmd() + + cmd.extend(['--mime-type']) + cmd.extend(['-b']) + cmd.extend([self.filepath]) + + return cmd + + def is_tiff(self): + return self.guess() == 'image/tiff' + + def is_pdf(self): + return self.guess() == 'application/pdf' + + def is_image(self): + """ + Returns true if MIME type is one of following: + * image/png + * image/jpg + """ + return self.guess() in ('image/png', 'image/jpg') + + def guess(self): + cmd = self.get_cmd() + complete = self.run(cmd) + + return complete.stdout.strip() diff --git a/mglib/pdfinfo.py b/mglib/pdfinfo.py index f7065b9..e200342 100644 --- a/mglib/pdfinfo.py +++ b/mglib/pdfinfo.py @@ -11,6 +11,43 @@ small operations (e.g. get pdf page count). logger = logging.getLogger(__name__) +def get_tiff_pagecount(filepath): + cmd = [ + "/usr/bin/identify", + "-format", + "%n\n", + filepath + ] + compl = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + if compl.returncode: + + logger.error( + "get_tiff_pagecount: cmd=%s args=%s stdout=%s stderr=%s code=%s", + cmd, + compl.args, + compl.stdout, + compl.stderr, + compl.returncode, + stack_info=True + ) + + raise Exception("Error occured while getting document page count.") + + lines = compl.stdout.decode('utf-8').split('\n') + # look up for the line containing "Pages: 11" + for line in lines: + x = re.match(r"(\d+)", line.strip()) + if x: + return int(x.group(1)) + + return 0 + + def get_pagecount(filepath): """ Returns the number of pages in a PDF document as integer. @@ -31,9 +68,12 @@ def get_pagecount(filepath): # considered by default one page document. return 1 - if ext and ext.lower() not in ('.pdf',): + if ext and ext.lower() in ('.tiff', ): + return get_tiff_pagecount(filepath) + + if ext and ext.lower() not in ('.pdf', '.tiff'): raise ValueError( - "Only jpeg, png and pdf are handlerd by this" + "Only jpeg, png, pdf and tiff are handlerd by this" " method" ) diff --git a/mglib/tiff.py b/mglib/tiff.py new file mode 100644 index 0000000..fc1298c --- /dev/null +++ b/mglib/tiff.py @@ -0,0 +1,32 @@ +import os +import logging + +from mglib.runcmd import run + +logger = logging.getLogger(__name__) + + +def convert_tiff2pdf(doc_url): + + logger.debug(f"convert_tiff2pdf for {doc_url}") + # basename is filename + ext (no path) + + basename = os.path.basename(doc_url) + base_root, base_ext = os.path.splitext(basename) + root, ext = os.path.splitext(doc_url) + new_doc_url = f"{root}.pdf" + + logger.debug( + f"tiff2pdf source={doc_url} dest={new_doc_url}" + ) + + cmd = ( + "convert", + doc_url, + new_doc_url, + ) + + run(cmd) + + # returns new filename + return f"{base_root}.pdf" diff --git a/mglib/wrapper.py b/mglib/wrapper.py new file mode 100644 index 0000000..a092573 --- /dev/null +++ b/mglib/wrapper.py @@ -0,0 +1,49 @@ +import logging +import subprocess + +logger = logging.getLogger(__name__) + + +class Wrapper: + + def __init__(self, exec_name, check=True, dry_run=False): + self.exec_name = exec_name + self.check = check, + # usefull for debugging purpose + self.dry_run = dry_run + + def get_cmd(self): + cmd = [] + + if self.exec_name: + cmd.extend([self.exec_name]) + + return cmd + + def call_no_args(self): + + cmd = self.get_cmd() + self.run(cmd) + + def run(self, cmd): + + command_to_run = ' '.join(cmd) + + if (self.dry_run): + logger.debug(f"Dry run: {command_to_run}") + + logger.debug(f"subprocess: {command_to_run}") + + ret = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8" + ) + if ret.returncode != 0: + logger.error(( + f"returncode={ret.returncode}" + f" stdout={ret.stdout}" + f" stderr={ret.stderr}" + )) + return ret diff --git a/setup.py b/setup.py index 84cfe78..6e399c9 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r") as fh: setup( name="mglib", - version="1.2.0", + version="1.2.1", author="Eugen Ciur", author_email="eugen@papermerge.com", url="https://github.com/papermerge/mglib",