mirror of https://github.com/papermerge/mglib
moved endpoint and steps into mglib
parent
32f58fe8ed
commit
024f47dc20
|
@ -0,0 +1,45 @@
|
|||
class Step:
|
||||
|
||||
# width of a document when displayed as 100%.
|
||||
WIDTH_100p = 1240
|
||||
PERCENT = 100
|
||||
LIST = [125, 100, 75, 50, 10]
|
||||
|
||||
# aspect ration for A4 paper is h = w * 1.41
|
||||
# for 100
|
||||
# 100 => w = 1240, h = 1748
|
||||
# 50 => w = 620, h = 874
|
||||
|
||||
def __init__(self, current=1):
|
||||
self.current = current
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
p = self.percent / 100
|
||||
return int(p * Step.WIDTH_100p)
|
||||
|
||||
@property
|
||||
def is_thumbnail(self):
|
||||
return self.percent < 50
|
||||
|
||||
@property
|
||||
def is_for_hocr(self):
|
||||
return not self.is_thumbnail
|
||||
|
||||
@property
|
||||
def percent(self):
|
||||
return Step.LIST[self.current]
|
||||
|
||||
def __str__(self):
|
||||
return f"Step(percent={self.percent}, width={self.width})"
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class Steps:
|
||||
def __init__(self):
|
||||
self.steps = [Step(0), Step(1), Step(2), Step(3), Step(4)]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.steps)
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
class Storage:
|
||||
"""
|
||||
Storage class which works with Endpointsf
|
||||
"""
|
||||
|
||||
def delete(self, ep):
|
||||
pass
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from unittest.loader import TestLoader
|
||||
from unittest.runner import TextTestRunner
|
||||
|
||||
BASE_DIR = os.path.dirname(
|
||||
os.path.abspath(__file__)
|
||||
)
|
||||
|
||||
test_loader = TestLoader()
|
||||
test_runner = TextTestRunner()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
tests = test_loader.discover(
|
||||
start_dir=BASE_DIR,
|
||||
)
|
||||
|
||||
result = test_runner.run(tests)
|
||||
|
||||
if not result.wasSuccessful():
|
||||
sys.exit(1)
|
|
@ -0,0 +1,303 @@
|
|||
import unittest
|
||||
import os
|
||||
|
||||
from mglib.endpoint import (
|
||||
Endpoint, DocumentEp, PageEp,
|
||||
get_bucketname, get_keyname
|
||||
)
|
||||
from mglib.step import Step
|
||||
|
||||
|
||||
class TestOthers(unittest.TestCase):
|
||||
|
||||
def test_getbucketname(self):
|
||||
self.assertEqual(
|
||||
get_bucketname("s3://my-bucket/some/path/to/x.pdf"),
|
||||
"my-bucket"
|
||||
)
|
||||
self.assertEqual(
|
||||
get_bucketname("s3:/my-bucket/some/path/to/x.pdf"),
|
||||
"my-bucket"
|
||||
)
|
||||
self.assertEqual(
|
||||
get_bucketname("s3:/my-bucket/"),
|
||||
"my-bucket"
|
||||
)
|
||||
self.assertEqual(
|
||||
get_bucketname("s3:/my-bucket"),
|
||||
"my-bucket"
|
||||
)
|
||||
|
||||
def test_getkeyname(self):
|
||||
self.assertEqual(
|
||||
get_keyname("s3://my-bucket/some/path/to/x.pdf"),
|
||||
"some/path/to/x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
get_keyname("s3:/my-bucket/some/path/to/x.pdf"),
|
||||
"some/path/to/x.pdf"
|
||||
)
|
||||
|
||||
|
||||
class TestEndpoint(unittest.TestCase):
|
||||
|
||||
def test_s3_bucketname(self):
|
||||
ep = Endpoint("s3:/constellation/")
|
||||
self.assertTrue(ep.is_s3)
|
||||
self.assertFalse(ep.is_local)
|
||||
self.assertEqual(
|
||||
ep.bucketname,
|
||||
"constellation"
|
||||
)
|
||||
|
||||
def test_s3_bucketname_no_slash(self):
|
||||
ep = Endpoint("s3:/kakamaka")
|
||||
self.assertEqual(
|
||||
ep.bucketname,
|
||||
"kakamaka"
|
||||
)
|
||||
|
||||
def test_local(self):
|
||||
ep = Endpoint("local:/var/media/files")
|
||||
self.assertEqual(
|
||||
ep.dirname,
|
||||
"/var/media/files/"
|
||||
)
|
||||
|
||||
def test_repr(self):
|
||||
ep = Endpoint("s3:/bucket/")
|
||||
self.assertEqual(
|
||||
f"{ep}",
|
||||
"Endpoint(s3:/bucket/)"
|
||||
)
|
||||
|
||||
|
||||
class TestDocumentEp(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.remote_ep = Endpoint("s3:/silver-bucket/")
|
||||
self.local_ep = Endpoint("local:/var/media/")
|
||||
|
||||
def test_document_url_key(self):
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="contract.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.bucketname,
|
||||
"silver-bucket"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.key,
|
||||
"docs/user_1/document_3/contract.pdf"
|
||||
)
|
||||
|
||||
def test_document_url(self):
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.url(ep=Endpoint.S3),
|
||||
"s3:/silver-bucket/docs/user_1/document_3/x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.url(ep=Endpoint.LOCAL),
|
||||
"/var/media/docs/user_1/document_3/x.pdf"
|
||||
)
|
||||
|
||||
def test_empty_tenant(self):
|
||||
"""
|
||||
With no tenant specified - url to document will
|
||||
be without tenant.
|
||||
"""
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.url(),
|
||||
"/var/media/docs/user_1/document_3/x.pdf"
|
||||
)
|
||||
|
||||
def test_inc_version(self):
|
||||
"""
|
||||
Document endpoints are now versioned.
|
||||
Initial version is 0.
|
||||
When version is 0, the "old" endpoint path applies i.e.
|
||||
version is not included in the path.
|
||||
After document is modified (blank page deleted for example),
|
||||
its version is incremented. If document version is > 0, then
|
||||
version is included in the path.
|
||||
"""
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="x.pdf"
|
||||
)
|
||||
doc_ep.inc_version()
|
||||
|
||||
self.assertEqual(
|
||||
doc_ep.url(),
|
||||
"/var/media/docs/user_1/document_3/v1/x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.url(ep=Endpoint.S3),
|
||||
"s3:/silver-bucket/docs/user_1/document_3/v1/x.pdf"
|
||||
)
|
||||
|
||||
doc_ep.inc_version()
|
||||
|
||||
self.assertEqual(
|
||||
doc_ep.url(),
|
||||
"/var/media/docs/user_1/document_3/v2/x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
doc_ep.url(ep=Endpoint.S3),
|
||||
"s3:/silver-bucket/docs/user_1/document_3/v2/x.pdf"
|
||||
)
|
||||
|
||||
def test_dirname(self):
|
||||
ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
aux_dir="results",
|
||||
file_name="x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
ep.dirname,
|
||||
"/var/media/results/user_1/document_3/"
|
||||
)
|
||||
|
||||
def test_pages_dirname(self):
|
||||
ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
aux_dir="results",
|
||||
file_name="x.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
ep.pages_dirname,
|
||||
"/var/media/results/user_1/document_3/pages/"
|
||||
)
|
||||
|
||||
|
||||
class TestPageEp(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.remote_ep = Endpoint("s3:/silver-bucket/")
|
||||
self.local_ep = Endpoint("local:/var/media/")
|
||||
|
||||
def test_versioned_page_ep(self):
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="x.pdf"
|
||||
)
|
||||
# document's version incremented
|
||||
doc_ep.inc_version()
|
||||
|
||||
page_ep = PageEp(
|
||||
document_ep=doc_ep,
|
||||
page_num=1,
|
||||
page_count=3
|
||||
)
|
||||
self.assertEqual(
|
||||
page_ep.url(),
|
||||
"/var/media/results/user_1/document_3/v1/pages/page_1.txt"
|
||||
)
|
||||
|
||||
def test_txt_url(self):
|
||||
"""
|
||||
Without any arguments
|
||||
page_ep.url() returns page_ep.txt_url()
|
||||
"""
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="x.pdf"
|
||||
)
|
||||
page_ep = PageEp(
|
||||
document_ep=doc_ep,
|
||||
page_num=1,
|
||||
step=Step(1),
|
||||
page_count=3
|
||||
)
|
||||
self.assertEqual(
|
||||
page_ep.url(),
|
||||
page_ep.txt_url()
|
||||
)
|
||||
|
||||
def test_ppmroot(self):
|
||||
doc_ep = DocumentEp(
|
||||
remote_endpoint=self.remote_ep,
|
||||
local_endpoint=self.local_ep,
|
||||
user_id=1,
|
||||
document_id=3,
|
||||
file_name="x.pdf"
|
||||
)
|
||||
page_url = PageEp(
|
||||
document_ep=doc_ep,
|
||||
page_num=1,
|
||||
step=Step(1),
|
||||
page_count=3
|
||||
)
|
||||
self.assertEqual(
|
||||
page_url.ppmroot,
|
||||
(f"/var/media/results/user_1/"
|
||||
f"document_3/pages/page_1/100/page")
|
||||
)
|
||||
|
||||
# def test_hocr_exists(self):
|
||||
# local_media = os.path.join(
|
||||
# os.path.dirname(os.path.dirname(__file__)),
|
||||
# "test",
|
||||
# "media"
|
||||
# )
|
||||
# remote_ep = Endpoint("s3:/test-papermerge/")
|
||||
# local_ep = Endpoint(f"local:{local_media}")
|
||||
# doc_ep = DocumentEp(
|
||||
# remote_endpoint=remote_ep,
|
||||
# local_endpoint=local_ep,
|
||||
# user_id=1,
|
||||
# document_id=3,
|
||||
# file_name="x.pdf"
|
||||
# )
|
||||
# page_ep1 = PageEp(
|
||||
# document_ep=doc_ep,
|
||||
# page_num=1,
|
||||
# step=Step(1),
|
||||
# page_count=3
|
||||
# )
|
||||
# self.assertTrue(
|
||||
# page_ep1.hocr_exists()
|
||||
# )
|
||||
# page_ep2 = PageEp(
|
||||
# document_ep=doc_ep,
|
||||
# page_num=2,
|
||||
# step=Step(1),
|
||||
# page_count=3
|
||||
# )
|
||||
# self.assertFalse(
|
||||
# page_ep2.hocr_exists()
|
||||
# )
|
||||
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
import unittest
|
||||
from pmworker.step import Step
|
||||
|
||||
|
||||
class TestStep(unittest.TestCase):
|
||||
|
||||
def test_step(self):
|
||||
step = Step(1)
|
||||
self.assertFalse(
|
||||
step.is_thumbnail,
|
||||
f"{step} is is_thumbnail, but it should not be!"
|
||||
)
|
Loading…
Reference in New Issue