diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 8443664..610f9a2 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -24,7 +24,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pycodestyle pytest coverage - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements/base.txt ]; then pip install -r requirements/base.txt; fi sudo apt install poppler-utils pdftk - name: Lint with pycodestyle run: | diff --git a/mglib/pdfinfo.py b/mglib/pdfinfo.py index 123f231..ef82c6b 100644 --- a/mglib/pdfinfo.py +++ b/mglib/pdfinfo.py @@ -2,6 +2,7 @@ import os import re import subprocess import logging +from magic import from_file from .conf import settings from .exceptions import FileTypeNotSupported @@ -63,18 +64,18 @@ def get_pagecount(filepath): if os.path.isdir(filepath): raise ValueError("Filepath %s is a directory!" % filepath) - base, ext = os.path.splitext(filepath) + mime_type = from_file(filepath, mime=True) # pure images (png, jpeg) have only one page :) - if ext and ext.lower() in ('.jpeg', '.png', '.jpg'): + if mime_type in ['image/png', 'image/jpeg', 'image/jpg']: # whatever png/jpg image is there - it is # considered by default one page document. return 1 - if ext and ext.lower() in ('.tiff', ): + if mime_type == 'image/tiff': return get_tiff_pagecount(filepath) - if ext and ext.lower() not in ('.pdf', '.tiff'): + if mime_type != 'application/pdf': raise FileTypeNotSupported( "Only jpeg, png, pdf and tiff are handled by this" " method" diff --git a/requirements/base.txt b/requirements/base.txt new file mode 100644 index 0000000..aee0e39 --- /dev/null +++ b/requirements/base.txt @@ -0,0 +1 @@ +python-magic \ No newline at end of file diff --git a/test/data/berlin.jpeg b/test/data/berlin.jpeg index c305027..2605b81 100644 --- a/test/data/berlin.jpeg +++ b/test/data/berlin.jpeg @@ -1,2 +1,2 @@ -I am not even binary! +ÿØÿØI am not even binary! The idea is to test pdfinfo.get_pagecount \ No newline at end of file diff --git a/test/data/berlin.jpg b/test/data/berlin.jpg index 6c06761..8e5a3dd 100644 --- a/test/data/berlin.jpg +++ b/test/data/berlin.jpg @@ -1 +1 @@ -well... I am text! But who cares? The idea is to test pdfinfo.get_pagecount \ No newline at end of file +ÿØÿîwell... I am text! But who cares? The idea is to test pdfinfo.get_pagecount \ No newline at end of file diff --git a/test/data/berlin.png b/test/data/berlin.png index 6c06761..df88485 100644 Binary files a/test/data/berlin.png and b/test/data/berlin.png differ