diff --git a/mglib/storage.py b/mglib/storage.py index 5e64cb4..3add56d 100644 --- a/mglib/storage.py +++ b/mglib/storage.py @@ -1,12 +1,22 @@ import os +import shutil + + +def safe_to_delete(place, safe_extensions): + raise Exception( + "Trying to recursively delete unsafe path" + ) class Storage: """ - Storage class which works with DocumentPath and PagePath + Default Storage class which works with DocumentPath and PagePath + on local host filesystem """ def __init__(self, location=None): + # by default, this will be something like + # settings.MEDIA_ROOT self._location = location @property @@ -22,12 +32,31 @@ class Storage: """ Receives a mglib.path.DocumentPath instance """ - pass + # where original documents and their versions are stored + abs_dirname_docs = self.path( + doc_path.dirname_docs + ) + # where OCRed information and generated thumbnails + # are stored + abs_dirname_results = self.path( + doc_path.abs_dirname_results + ) + # Before recursively deleting everything in folder + # double check that there are only + # .pdf, .txt, .hocr, .jpg files. + if safe_to_delete( + abs_dirname_docs, + safe_extensions=['pdf'] + ): + shutil.rmtree(abs_dirname_docs) + + if safe_to_delete( + abs_dirname_results, + safe_extensions=['txt', 'jpg', 'hocr'] + ): + shutil.rmtree(abs_dirname_results) def exists(self, _path): return os.path.exists( self.path(_path) ) - - -