mirror of https://github.com/papermerge/mglib
version bump. Bring last modules from pmworker. Make mglib - tiff aware
parent
55de275ad5
commit
4da04050a2
|
@ -1,11 +1,12 @@
|
|||
# Changelog
|
||||
|
||||
## [1.2.0] - 16 July 2020
|
||||
## [1.2.1] - 16 July 2020
|
||||
|
||||
### Added
|
||||
|
||||
- shortcuts.extract_img - resizes/converts images jpg, png documents
|
||||
|
||||
- shortcuts.resize_img - resizes/converts images jpg, png documents
|
||||
- change get_pagecount to work with tiff files as well
|
||||
- bring in last modules from pmworker (mime and wrapper)
|
||||
|
||||
## [1.1.0] - 25 June 2020
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
import logging
|
||||
from pmworker import wrapper
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Mime(wrapper.Wrapper):
|
||||
def __init__(self, filepath):
|
||||
super().__init__(exec_name="file")
|
||||
self.filepath = filepath
|
||||
|
||||
def get_cmd(self):
|
||||
cmd = super().get_cmd()
|
||||
|
||||
cmd.extend(['--mime-type'])
|
||||
cmd.extend(['-b'])
|
||||
cmd.extend([self.filepath])
|
||||
|
||||
return cmd
|
||||
|
||||
def is_tiff(self):
|
||||
return self.guess() == 'image/tiff'
|
||||
|
||||
def is_pdf(self):
|
||||
return self.guess() == 'application/pdf'
|
||||
|
||||
def is_image(self):
|
||||
"""
|
||||
Returns true if MIME type is one of following:
|
||||
* image/png
|
||||
* image/jpg
|
||||
"""
|
||||
return self.guess() in ('image/png', 'image/jpg')
|
||||
|
||||
def guess(self):
|
||||
cmd = self.get_cmd()
|
||||
complete = self.run(cmd)
|
||||
|
||||
return complete.stdout.strip()
|
|
@ -11,6 +11,43 @@ small operations (e.g. get pdf page count).
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_tiff_pagecount(filepath):
|
||||
cmd = [
|
||||
"/usr/bin/identify",
|
||||
"-format",
|
||||
"%n\n",
|
||||
filepath
|
||||
]
|
||||
compl = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
|
||||
if compl.returncode:
|
||||
|
||||
logger.error(
|
||||
"get_tiff_pagecount: cmd=%s args=%s stdout=%s stderr=%s code=%s",
|
||||
cmd,
|
||||
compl.args,
|
||||
compl.stdout,
|
||||
compl.stderr,
|
||||
compl.returncode,
|
||||
stack_info=True
|
||||
)
|
||||
|
||||
raise Exception("Error occured while getting document page count.")
|
||||
|
||||
lines = compl.stdout.decode('utf-8').split('\n')
|
||||
# look up for the line containing "Pages: 11"
|
||||
for line in lines:
|
||||
x = re.match(r"(\d+)", line.strip())
|
||||
if x:
|
||||
return int(x.group(1))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def get_pagecount(filepath):
|
||||
"""
|
||||
Returns the number of pages in a PDF document as integer.
|
||||
|
@ -31,9 +68,12 @@ def get_pagecount(filepath):
|
|||
# considered by default one page document.
|
||||
return 1
|
||||
|
||||
if ext and ext.lower() not in ('.pdf',):
|
||||
if ext and ext.lower() in ('.tiff', ):
|
||||
return get_tiff_pagecount(filepath)
|
||||
|
||||
if ext and ext.lower() not in ('.pdf', '.tiff'):
|
||||
raise ValueError(
|
||||
"Only jpeg, png and pdf are handlerd by this"
|
||||
"Only jpeg, png, pdf and tiff are handlerd by this"
|
||||
" method"
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
import os
|
||||
import logging
|
||||
|
||||
from mglib.runcmd import run
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def convert_tiff2pdf(doc_url):
|
||||
|
||||
logger.debug(f"convert_tiff2pdf for {doc_url}")
|
||||
# basename is filename + ext (no path)
|
||||
|
||||
basename = os.path.basename(doc_url)
|
||||
base_root, base_ext = os.path.splitext(basename)
|
||||
root, ext = os.path.splitext(doc_url)
|
||||
new_doc_url = f"{root}.pdf"
|
||||
|
||||
logger.debug(
|
||||
f"tiff2pdf source={doc_url} dest={new_doc_url}"
|
||||
)
|
||||
|
||||
cmd = (
|
||||
"convert",
|
||||
doc_url,
|
||||
new_doc_url,
|
||||
)
|
||||
|
||||
run(cmd)
|
||||
|
||||
# returns new filename
|
||||
return f"{base_root}.pdf"
|
|
@ -0,0 +1,49 @@
|
|||
import logging
|
||||
import subprocess
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Wrapper:
|
||||
|
||||
def __init__(self, exec_name, check=True, dry_run=False):
|
||||
self.exec_name = exec_name
|
||||
self.check = check,
|
||||
# usefull for debugging purpose
|
||||
self.dry_run = dry_run
|
||||
|
||||
def get_cmd(self):
|
||||
cmd = []
|
||||
|
||||
if self.exec_name:
|
||||
cmd.extend([self.exec_name])
|
||||
|
||||
return cmd
|
||||
|
||||
def call_no_args(self):
|
||||
|
||||
cmd = self.get_cmd()
|
||||
self.run(cmd)
|
||||
|
||||
def run(self, cmd):
|
||||
|
||||
command_to_run = ' '.join(cmd)
|
||||
|
||||
if (self.dry_run):
|
||||
logger.debug(f"Dry run: {command_to_run}")
|
||||
|
||||
logger.debug(f"subprocess: {command_to_run}")
|
||||
|
||||
ret = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
encoding="utf-8"
|
||||
)
|
||||
if ret.returncode != 0:
|
||||
logger.error((
|
||||
f"returncode={ret.returncode}"
|
||||
f" stdout={ret.stdout}"
|
||||
f" stderr={ret.stderr}"
|
||||
))
|
||||
return ret
|
Loading…
Reference in New Issue