2020-05-04 13:16:44 +02:00
|
|
|
import os
|
2020-05-04 16:05:23 +02:00
|
|
|
import logging
|
2020-05-04 14:39:23 +02:00
|
|
|
import shutil
|
|
|
|
|
2020-05-04 16:05:23 +02:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2020-05-04 14:39:23 +02:00
|
|
|
|
|
|
|
def safe_to_delete(place, safe_extensions):
|
2020-05-04 16:05:23 +02:00
|
|
|
if not os.path.exists(place):
|
|
|
|
logging.warning(
|
|
|
|
f"Trying to delete not exising folder"
|
|
|
|
f" {place}"
|
|
|
|
)
|
|
|
|
return False
|
|
|
|
|
|
|
|
for root, dirs, files in os.walk(place):
|
|
|
|
for name in files:
|
|
|
|
print(name)
|
|
|
|
|
|
|
|
return False
|
2020-05-04 13:16:44 +02:00
|
|
|
|
2020-05-04 12:35:08 +02:00
|
|
|
|
|
|
|
class Storage:
|
|
|
|
"""
|
2020-05-04 14:39:23 +02:00
|
|
|
Default Storage class which works with DocumentPath and PagePath
|
|
|
|
on local host filesystem
|
2020-05-04 12:35:08 +02:00
|
|
|
"""
|
|
|
|
|
2020-05-04 13:16:44 +02:00
|
|
|
def __init__(self, location=None):
|
2020-05-04 14:39:23 +02:00
|
|
|
# by default, this will be something like
|
|
|
|
# settings.MEDIA_ROOT
|
2020-05-04 13:16:44 +02:00
|
|
|
self._location = location
|
|
|
|
|
|
|
|
@property
|
|
|
|
def location(self):
|
|
|
|
return self._location
|
|
|
|
|
2020-05-04 13:58:56 +02:00
|
|
|
def path(self, _path):
|
2020-05-04 13:16:44 +02:00
|
|
|
return os.path.join(
|
2020-05-04 13:58:56 +02:00
|
|
|
self.location, _path
|
2020-05-04 13:16:44 +02:00
|
|
|
)
|
|
|
|
|
2020-05-04 13:58:56 +02:00
|
|
|
def delete_document(self, doc_path):
|
|
|
|
"""
|
|
|
|
Receives a mglib.path.DocumentPath instance
|
|
|
|
"""
|
2020-05-04 14:39:23 +02:00
|
|
|
# where original documents and their versions are stored
|
|
|
|
abs_dirname_docs = self.path(
|
|
|
|
doc_path.dirname_docs
|
|
|
|
)
|
|
|
|
# where OCRed information and generated thumbnails
|
|
|
|
# are stored
|
|
|
|
abs_dirname_results = self.path(
|
2020-05-04 16:05:23 +02:00
|
|
|
doc_path.dirname_results
|
2020-05-04 14:39:23 +02:00
|
|
|
)
|
|
|
|
# Before recursively deleting everything in folder
|
|
|
|
# double check that there are only
|
|
|
|
# .pdf, .txt, .hocr, .jpg files.
|
|
|
|
if safe_to_delete(
|
|
|
|
abs_dirname_docs,
|
|
|
|
safe_extensions=['pdf']
|
|
|
|
):
|
|
|
|
shutil.rmtree(abs_dirname_docs)
|
|
|
|
|
|
|
|
if safe_to_delete(
|
|
|
|
abs_dirname_results,
|
|
|
|
safe_extensions=['txt', 'jpg', 'hocr']
|
|
|
|
):
|
|
|
|
shutil.rmtree(abs_dirname_results)
|
2020-05-04 13:58:56 +02:00
|
|
|
|
|
|
|
def exists(self, _path):
|
|
|
|
return os.path.exists(
|
|
|
|
self.path(_path)
|
2020-05-04 13:16:44 +02:00
|
|
|
)
|