From 6808b3f0a3a5d14086cc714b4e6dfc1b6ce9ed17 Mon Sep 17 00:00:00 2001 From: Eugen Ciur Date: Mon, 4 May 2020 14:39:23 +0200 Subject: [PATCH] refactoring endpoint, storage, doc/pagepath --- mglib/storage.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/mglib/storage.py b/mglib/storage.py index 5e64cb4..3add56d 100644 --- a/mglib/storage.py +++ b/mglib/storage.py @@ -1,12 +1,22 @@ import os +import shutil + + +def safe_to_delete(place, safe_extensions): + raise Exception( + "Trying to recursively delete unsafe path" + ) class Storage: """ - Storage class which works with DocumentPath and PagePath + Default Storage class which works with DocumentPath and PagePath + on local host filesystem """ def __init__(self, location=None): + # by default, this will be something like + # settings.MEDIA_ROOT self._location = location @property @@ -22,12 +32,31 @@ class Storage: """ Receives a mglib.path.DocumentPath instance """ - pass + # where original documents and their versions are stored + abs_dirname_docs = self.path( + doc_path.dirname_docs + ) + # where OCRed information and generated thumbnails + # are stored + abs_dirname_results = self.path( + doc_path.abs_dirname_results + ) + # Before recursively deleting everything in folder + # double check that there are only + # .pdf, .txt, .hocr, .jpg files. + if safe_to_delete( + abs_dirname_docs, + safe_extensions=['pdf'] + ): + shutil.rmtree(abs_dirname_docs) + + if safe_to_delete( + abs_dirname_results, + safe_extensions=['txt', 'jpg', 'hocr'] + ): + shutil.rmtree(abs_dirname_results) def exists(self, _path): return os.path.exists( self.path(_path) ) - - -