mirror of https://github.com/papermerge/mglib
version bump. Bring last modules from pmworker. Make mglib - tiff aware
parent
55de275ad5
commit
4da04050a2
|
@ -1,11 +1,12 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
## [1.2.0] - 16 July 2020
|
## [1.2.1] - 16 July 2020
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- shortcuts.extract_img - resizes/converts images jpg, png documents
|
- shortcuts.resize_img - resizes/converts images jpg, png documents
|
||||||
|
- change get_pagecount to work with tiff files as well
|
||||||
|
- bring in last modules from pmworker (mime and wrapper)
|
||||||
|
|
||||||
## [1.1.0] - 25 June 2020
|
## [1.1.0] - 25 June 2020
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
import logging
|
||||||
|
from pmworker import wrapper
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Mime(wrapper.Wrapper):
|
||||||
|
def __init__(self, filepath):
|
||||||
|
super().__init__(exec_name="file")
|
||||||
|
self.filepath = filepath
|
||||||
|
|
||||||
|
def get_cmd(self):
|
||||||
|
cmd = super().get_cmd()
|
||||||
|
|
||||||
|
cmd.extend(['--mime-type'])
|
||||||
|
cmd.extend(['-b'])
|
||||||
|
cmd.extend([self.filepath])
|
||||||
|
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
def is_tiff(self):
|
||||||
|
return self.guess() == 'image/tiff'
|
||||||
|
|
||||||
|
def is_pdf(self):
|
||||||
|
return self.guess() == 'application/pdf'
|
||||||
|
|
||||||
|
def is_image(self):
|
||||||
|
"""
|
||||||
|
Returns true if MIME type is one of following:
|
||||||
|
* image/png
|
||||||
|
* image/jpg
|
||||||
|
"""
|
||||||
|
return self.guess() in ('image/png', 'image/jpg')
|
||||||
|
|
||||||
|
def guess(self):
|
||||||
|
cmd = self.get_cmd()
|
||||||
|
complete = self.run(cmd)
|
||||||
|
|
||||||
|
return complete.stdout.strip()
|
|
@ -11,6 +11,43 @@ small operations (e.g. get pdf page count).
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_tiff_pagecount(filepath):
|
||||||
|
cmd = [
|
||||||
|
"/usr/bin/identify",
|
||||||
|
"-format",
|
||||||
|
"%n\n",
|
||||||
|
filepath
|
||||||
|
]
|
||||||
|
compl = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE
|
||||||
|
)
|
||||||
|
|
||||||
|
if compl.returncode:
|
||||||
|
|
||||||
|
logger.error(
|
||||||
|
"get_tiff_pagecount: cmd=%s args=%s stdout=%s stderr=%s code=%s",
|
||||||
|
cmd,
|
||||||
|
compl.args,
|
||||||
|
compl.stdout,
|
||||||
|
compl.stderr,
|
||||||
|
compl.returncode,
|
||||||
|
stack_info=True
|
||||||
|
)
|
||||||
|
|
||||||
|
raise Exception("Error occured while getting document page count.")
|
||||||
|
|
||||||
|
lines = compl.stdout.decode('utf-8').split('\n')
|
||||||
|
# look up for the line containing "Pages: 11"
|
||||||
|
for line in lines:
|
||||||
|
x = re.match(r"(\d+)", line.strip())
|
||||||
|
if x:
|
||||||
|
return int(x.group(1))
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def get_pagecount(filepath):
|
def get_pagecount(filepath):
|
||||||
"""
|
"""
|
||||||
Returns the number of pages in a PDF document as integer.
|
Returns the number of pages in a PDF document as integer.
|
||||||
|
@ -31,9 +68,12 @@ def get_pagecount(filepath):
|
||||||
# considered by default one page document.
|
# considered by default one page document.
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if ext and ext.lower() not in ('.pdf',):
|
if ext and ext.lower() in ('.tiff', ):
|
||||||
|
return get_tiff_pagecount(filepath)
|
||||||
|
|
||||||
|
if ext and ext.lower() not in ('.pdf', '.tiff'):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Only jpeg, png and pdf are handlerd by this"
|
"Only jpeg, png, pdf and tiff are handlerd by this"
|
||||||
" method"
|
" method"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from mglib.runcmd import run
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_tiff2pdf(doc_url):
|
||||||
|
|
||||||
|
logger.debug(f"convert_tiff2pdf for {doc_url}")
|
||||||
|
# basename is filename + ext (no path)
|
||||||
|
|
||||||
|
basename = os.path.basename(doc_url)
|
||||||
|
base_root, base_ext = os.path.splitext(basename)
|
||||||
|
root, ext = os.path.splitext(doc_url)
|
||||||
|
new_doc_url = f"{root}.pdf"
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"tiff2pdf source={doc_url} dest={new_doc_url}"
|
||||||
|
)
|
||||||
|
|
||||||
|
cmd = (
|
||||||
|
"convert",
|
||||||
|
doc_url,
|
||||||
|
new_doc_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
run(cmd)
|
||||||
|
|
||||||
|
# returns new filename
|
||||||
|
return f"{base_root}.pdf"
|
|
@ -0,0 +1,49 @@
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Wrapper:
|
||||||
|
|
||||||
|
def __init__(self, exec_name, check=True, dry_run=False):
|
||||||
|
self.exec_name = exec_name
|
||||||
|
self.check = check,
|
||||||
|
# usefull for debugging purpose
|
||||||
|
self.dry_run = dry_run
|
||||||
|
|
||||||
|
def get_cmd(self):
|
||||||
|
cmd = []
|
||||||
|
|
||||||
|
if self.exec_name:
|
||||||
|
cmd.extend([self.exec_name])
|
||||||
|
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
def call_no_args(self):
|
||||||
|
|
||||||
|
cmd = self.get_cmd()
|
||||||
|
self.run(cmd)
|
||||||
|
|
||||||
|
def run(self, cmd):
|
||||||
|
|
||||||
|
command_to_run = ' '.join(cmd)
|
||||||
|
|
||||||
|
if (self.dry_run):
|
||||||
|
logger.debug(f"Dry run: {command_to_run}")
|
||||||
|
|
||||||
|
logger.debug(f"subprocess: {command_to_run}")
|
||||||
|
|
||||||
|
ret = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
encoding="utf-8"
|
||||||
|
)
|
||||||
|
if ret.returncode != 0:
|
||||||
|
logger.error((
|
||||||
|
f"returncode={ret.returncode}"
|
||||||
|
f" stdout={ret.stdout}"
|
||||||
|
f" stderr={ret.stderr}"
|
||||||
|
))
|
||||||
|
return ret
|
2
setup.py
2
setup.py
|
@ -6,7 +6,7 @@ with open("README.md", "r") as fh:
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="mglib",
|
name="mglib",
|
||||||
version="1.2.0",
|
version="1.2.1",
|
||||||
author="Eugen Ciur",
|
author="Eugen Ciur",
|
||||||
author_email="eugen@papermerge.com",
|
author_email="eugen@papermerge.com",
|
||||||
url="https://github.com/papermerge/mglib",
|
url="https://github.com/papermerge/mglib",
|
||||||
|
|
Loading…
Reference in New Issue