version bump. Bring last modules from pmworker. Make mglib - tiff aware

pull/3/head
Eugen Ciur 2020-07-16 11:06:41 +02:00
parent 55de275ad5
commit 4da04050a2
6 changed files with 168 additions and 6 deletions

View File

@ -1,11 +1,12 @@
# Changelog
## [1.2.0] - 16 July 2020
## [1.2.1] - 16 July 2020
### Added
- shortcuts.extract_img - resizes/converts images jpg, png documents
- shortcuts.resize_img - resizes/converts images jpg, png documents
- change get_pagecount to work with tiff files as well
- bring in last modules from pmworker (mime and wrapper)
## [1.1.0] - 25 June 2020

40
mglib/mime.py Normal file
View File

@ -0,0 +1,40 @@
import logging
from pmworker import wrapper
logger = logging.getLogger(__name__)
class Mime(wrapper.Wrapper):
def __init__(self, filepath):
super().__init__(exec_name="file")
self.filepath = filepath
def get_cmd(self):
cmd = super().get_cmd()
cmd.extend(['--mime-type'])
cmd.extend(['-b'])
cmd.extend([self.filepath])
return cmd
def is_tiff(self):
return self.guess() == 'image/tiff'
def is_pdf(self):
return self.guess() == 'application/pdf'
def is_image(self):
"""
Returns true if MIME type is one of following:
* image/png
* image/jpg
"""
return self.guess() in ('image/png', 'image/jpg')
def guess(self):
cmd = self.get_cmd()
complete = self.run(cmd)
return complete.stdout.strip()

View File

@ -11,6 +11,43 @@ small operations (e.g. get pdf page count).
logger = logging.getLogger(__name__)
def get_tiff_pagecount(filepath):
cmd = [
"/usr/bin/identify",
"-format",
"%n\n",
filepath
]
compl = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
if compl.returncode:
logger.error(
"get_tiff_pagecount: cmd=%s args=%s stdout=%s stderr=%s code=%s",
cmd,
compl.args,
compl.stdout,
compl.stderr,
compl.returncode,
stack_info=True
)
raise Exception("Error occured while getting document page count.")
lines = compl.stdout.decode('utf-8').split('\n')
# look up for the line containing "Pages: 11"
for line in lines:
x = re.match(r"(\d+)", line.strip())
if x:
return int(x.group(1))
return 0
def get_pagecount(filepath):
"""
Returns the number of pages in a PDF document as integer.
@ -31,9 +68,12 @@ def get_pagecount(filepath):
# considered by default one page document.
return 1
if ext and ext.lower() not in ('.pdf',):
if ext and ext.lower() in ('.tiff', ):
return get_tiff_pagecount(filepath)
if ext and ext.lower() not in ('.pdf', '.tiff'):
raise ValueError(
"Only jpeg, png and pdf are handlerd by this"
"Only jpeg, png, pdf and tiff are handlerd by this"
" method"
)

32
mglib/tiff.py Normal file
View File

@ -0,0 +1,32 @@
import os
import logging
from mglib.runcmd import run
logger = logging.getLogger(__name__)
def convert_tiff2pdf(doc_url):
logger.debug(f"convert_tiff2pdf for {doc_url}")
# basename is filename + ext (no path)
basename = os.path.basename(doc_url)
base_root, base_ext = os.path.splitext(basename)
root, ext = os.path.splitext(doc_url)
new_doc_url = f"{root}.pdf"
logger.debug(
f"tiff2pdf source={doc_url} dest={new_doc_url}"
)
cmd = (
"convert",
doc_url,
new_doc_url,
)
run(cmd)
# returns new filename
return f"{base_root}.pdf"

49
mglib/wrapper.py Normal file
View File

@ -0,0 +1,49 @@
import logging
import subprocess
logger = logging.getLogger(__name__)
class Wrapper:
def __init__(self, exec_name, check=True, dry_run=False):
self.exec_name = exec_name
self.check = check,
# usefull for debugging purpose
self.dry_run = dry_run
def get_cmd(self):
cmd = []
if self.exec_name:
cmd.extend([self.exec_name])
return cmd
def call_no_args(self):
cmd = self.get_cmd()
self.run(cmd)
def run(self, cmd):
command_to_run = ' '.join(cmd)
if (self.dry_run):
logger.debug(f"Dry run: {command_to_run}")
logger.debug(f"subprocess: {command_to_run}")
ret = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding="utf-8"
)
if ret.returncode != 0:
logger.error((
f"returncode={ret.returncode}"
f" stdout={ret.stdout}"
f" stderr={ret.stderr}"
))
return ret

View File

@ -6,7 +6,7 @@ with open("README.md", "r") as fh:
setup(
name="mglib",
version="1.2.0",
version="1.2.1",
author="Eugen Ciur",
author_email="eugen@papermerge.com",
url="https://github.com/papermerge/mglib",