add extra checks for mime type, inc version, fix failing tests

master
Eugen Ciur 2020-12-01 11:40:51 +01:00
parent fe20ddd72b
commit 06be42542a
3 changed files with 32 additions and 6 deletions

View File

@ -1,6 +1,12 @@
# Changelog # Changelog
## [1.3.2] - 1 December 2020
### Changed
- mglib.pdfinfo.get_pagecount use python magic + file extention to determine correct mime type (and thus page count)
## [1.3.1] - 1 December 2020 ## [1.3.1] - 1 December 2020
### Changed ### Changed

View File

@ -64,22 +64,42 @@ def get_pagecount(filepath):
if os.path.isdir(filepath): if os.path.isdir(filepath):
raise ValueError("Filepath %s is a directory!" % filepath) raise ValueError("Filepath %s is a directory!" % filepath)
base, ext = os.path.splitext(filepath)
mime_type = from_file(filepath, mime=True) mime_type = from_file(filepath, mime=True)
# pure images (png, jpeg) have only one page :) # pure images (png, jpeg) have only one page :)
if mime_type in ['image/png', 'image/jpeg', 'image/jpg']: if mime_type in ['image/png', 'image/jpeg', 'image/jpg']:
# whatever png/jpg image is there - it is # whatever png/jpg image is there - it is
# considered by default one page document. # considered by default one page document.
return 1 return 1
# In case of REST API upload (via PUT + form multipart)
# django saves temporary file as application/octet-stream
# Checking extentions is an extra method of finding out correct
# mime type
if ext and ext.lower() in ('.jpeg', '.png', '.jpg'):
return 1
if mime_type == 'image/tiff': if mime_type == 'image/tiff':
return get_tiff_pagecount(filepath) return get_tiff_pagecount(filepath)
# In case of REST API upload (via PUT + form multipart)
# django saves temporary file as application/octet-stream
# Checking extentions is an extra method of finding out correct
# mime type
if ext and ext.lower() in ('.tiff', ):
return get_tiff_pagecount(filepath)
if mime_type != 'application/pdf': if mime_type != 'application/pdf':
raise FileTypeNotSupported( # In case of REST API upload (via PUT + form multipart)
"Only jpeg, png, pdf and tiff are handled by this" # django saves temporary file as application/octet-stream
" method" # Checking extentions is an extra method of finding out correct
) # mime type
if ext and ext.lower() != '.pdf':
raise FileTypeNotSupported(
"Only jpeg, png, pdf and tiff are handled by this"
" method"
)
# pdfinfo "${PDFFILE}" | grep Pages # pdfinfo "${PDFFILE}" | grep Pages
cmd = [ cmd = [
settings.BINARY_PDFINFO, settings.BINARY_PDFINFO,

View File

@ -6,7 +6,7 @@ with open("README.md", "r") as fh:
setup( setup(
name="mglib", name="mglib",
version="1.3.1", version="1.3.2",
author="Eugen Ciur", author="Eugen Ciur",
author_email="eugen@papermerge.com", author_email="eugen@papermerge.com",
url="https://github.com/papermerge/mglib", url="https://github.com/papermerge/mglib",