moved endpoint and steps into mglib

pull/3/head
Eugen Ciur 2020-05-04 12:35:08 +02:00
parent 32f58fe8ed
commit 024f47dc20
5 changed files with 393 additions and 0 deletions

45
mglib/step.py Normal file
View File

@ -0,0 +1,45 @@
class Step:
# width of a document when displayed as 100%.
WIDTH_100p = 1240
PERCENT = 100
LIST = [125, 100, 75, 50, 10]
# aspect ration for A4 paper is h = w * 1.41
# for 100
# 100 => w = 1240, h = 1748
# 50 => w = 620, h = 874
def __init__(self, current=1):
self.current = current
@property
def width(self):
p = self.percent / 100
return int(p * Step.WIDTH_100p)
@property
def is_thumbnail(self):
return self.percent < 50
@property
def is_for_hocr(self):
return not self.is_thumbnail
@property
def percent(self):
return Step.LIST[self.current]
def __str__(self):
return f"Step(percent={self.percent}, width={self.width})"
def __repr__(self):
return self.__str__()
class Steps:
def __init__(self):
self.steps = [Step(0), Step(1), Step(2), Step(3), Step(4)]
def __iter__(self):
return iter(self.steps)

10
mglib/storage.py Normal file
View File

@ -0,0 +1,10 @@
class Storage:
"""
Storage class which works with Endpointsf
"""
def delete(self, ep):
pass

23
test/run.py Normal file
View File

@ -0,0 +1,23 @@
import os
import sys
import argparse
from unittest.loader import TestLoader
from unittest.runner import TextTestRunner
BASE_DIR = os.path.dirname(
os.path.abspath(__file__)
)
test_loader = TestLoader()
test_runner = TextTestRunner()
parser = argparse.ArgumentParser()
tests = test_loader.discover(
start_dir=BASE_DIR,
)
result = test_runner.run(tests)
if not result.wasSuccessful():
sys.exit(1)

303
test/test_endpoint.py Normal file
View File

@ -0,0 +1,303 @@
import unittest
import os
from mglib.endpoint import (
Endpoint, DocumentEp, PageEp,
get_bucketname, get_keyname
)
from mglib.step import Step
class TestOthers(unittest.TestCase):
def test_getbucketname(self):
self.assertEqual(
get_bucketname("s3://my-bucket/some/path/to/x.pdf"),
"my-bucket"
)
self.assertEqual(
get_bucketname("s3:/my-bucket/some/path/to/x.pdf"),
"my-bucket"
)
self.assertEqual(
get_bucketname("s3:/my-bucket/"),
"my-bucket"
)
self.assertEqual(
get_bucketname("s3:/my-bucket"),
"my-bucket"
)
def test_getkeyname(self):
self.assertEqual(
get_keyname("s3://my-bucket/some/path/to/x.pdf"),
"some/path/to/x.pdf"
)
self.assertEqual(
get_keyname("s3:/my-bucket/some/path/to/x.pdf"),
"some/path/to/x.pdf"
)
class TestEndpoint(unittest.TestCase):
def test_s3_bucketname(self):
ep = Endpoint("s3:/constellation/")
self.assertTrue(ep.is_s3)
self.assertFalse(ep.is_local)
self.assertEqual(
ep.bucketname,
"constellation"
)
def test_s3_bucketname_no_slash(self):
ep = Endpoint("s3:/kakamaka")
self.assertEqual(
ep.bucketname,
"kakamaka"
)
def test_local(self):
ep = Endpoint("local:/var/media/files")
self.assertEqual(
ep.dirname,
"/var/media/files/"
)
def test_repr(self):
ep = Endpoint("s3:/bucket/")
self.assertEqual(
f"{ep}",
"Endpoint(s3:/bucket/)"
)
class TestDocumentEp(unittest.TestCase):
def setUp(self):
self.remote_ep = Endpoint("s3:/silver-bucket/")
self.local_ep = Endpoint("local:/var/media/")
def test_document_url_key(self):
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="contract.pdf"
)
self.assertEqual(
doc_ep.bucketname,
"silver-bucket"
)
self.assertEqual(
doc_ep.key,
"docs/user_1/document_3/contract.pdf"
)
def test_document_url(self):
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="x.pdf"
)
self.assertEqual(
doc_ep.url(ep=Endpoint.S3),
"s3:/silver-bucket/docs/user_1/document_3/x.pdf"
)
self.assertEqual(
doc_ep.url(ep=Endpoint.LOCAL),
"/var/media/docs/user_1/document_3/x.pdf"
)
def test_empty_tenant(self):
"""
With no tenant specified - url to document will
be without tenant.
"""
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="x.pdf"
)
self.assertEqual(
doc_ep.url(),
"/var/media/docs/user_1/document_3/x.pdf"
)
def test_inc_version(self):
"""
Document endpoints are now versioned.
Initial version is 0.
When version is 0, the "old" endpoint path applies i.e.
version is not included in the path.
After document is modified (blank page deleted for example),
its version is incremented. If document version is > 0, then
version is included in the path.
"""
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="x.pdf"
)
doc_ep.inc_version()
self.assertEqual(
doc_ep.url(),
"/var/media/docs/user_1/document_3/v1/x.pdf"
)
self.assertEqual(
doc_ep.url(ep=Endpoint.S3),
"s3:/silver-bucket/docs/user_1/document_3/v1/x.pdf"
)
doc_ep.inc_version()
self.assertEqual(
doc_ep.url(),
"/var/media/docs/user_1/document_3/v2/x.pdf"
)
self.assertEqual(
doc_ep.url(ep=Endpoint.S3),
"s3:/silver-bucket/docs/user_1/document_3/v2/x.pdf"
)
def test_dirname(self):
ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
aux_dir="results",
file_name="x.pdf"
)
self.assertEqual(
ep.dirname,
"/var/media/results/user_1/document_3/"
)
def test_pages_dirname(self):
ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
aux_dir="results",
file_name="x.pdf"
)
self.assertEqual(
ep.pages_dirname,
"/var/media/results/user_1/document_3/pages/"
)
class TestPageEp(unittest.TestCase):
def setUp(self):
self.remote_ep = Endpoint("s3:/silver-bucket/")
self.local_ep = Endpoint("local:/var/media/")
def test_versioned_page_ep(self):
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="x.pdf"
)
# document's version incremented
doc_ep.inc_version()
page_ep = PageEp(
document_ep=doc_ep,
page_num=1,
page_count=3
)
self.assertEqual(
page_ep.url(),
"/var/media/results/user_1/document_3/v1/pages/page_1.txt"
)
def test_txt_url(self):
"""
Without any arguments
page_ep.url() returns page_ep.txt_url()
"""
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="x.pdf"
)
page_ep = PageEp(
document_ep=doc_ep,
page_num=1,
step=Step(1),
page_count=3
)
self.assertEqual(
page_ep.url(),
page_ep.txt_url()
)
def test_ppmroot(self):
doc_ep = DocumentEp(
remote_endpoint=self.remote_ep,
local_endpoint=self.local_ep,
user_id=1,
document_id=3,
file_name="x.pdf"
)
page_url = PageEp(
document_ep=doc_ep,
page_num=1,
step=Step(1),
page_count=3
)
self.assertEqual(
page_url.ppmroot,
(f"/var/media/results/user_1/"
f"document_3/pages/page_1/100/page")
)
# def test_hocr_exists(self):
# local_media = os.path.join(
# os.path.dirname(os.path.dirname(__file__)),
# "test",
# "media"
# )
# remote_ep = Endpoint("s3:/test-papermerge/")
# local_ep = Endpoint(f"local:{local_media}")
# doc_ep = DocumentEp(
# remote_endpoint=remote_ep,
# local_endpoint=local_ep,
# user_id=1,
# document_id=3,
# file_name="x.pdf"
# )
# page_ep1 = PageEp(
# document_ep=doc_ep,
# page_num=1,
# step=Step(1),
# page_count=3
# )
# self.assertTrue(
# page_ep1.hocr_exists()
# )
# page_ep2 = PageEp(
# document_ep=doc_ep,
# page_num=2,
# step=Step(1),
# page_count=3
# )
# self.assertFalse(
# page_ep2.hocr_exists()
# )

12
test/test_step.py Normal file
View File

@ -0,0 +1,12 @@
import unittest
from pmworker.step import Step
class TestStep(unittest.TestCase):
def test_step(self):
step = Step(1)
self.assertFalse(
step.is_thumbnail,
f"{step} is is_thumbnail, but it should not be!"
)