From 024f47dc20e9148bc13b2f401ad2915a8c7342e9 Mon Sep 17 00:00:00 2001 From: Eugen Ciur Date: Mon, 4 May 2020 12:35:08 +0200 Subject: [PATCH] moved endpoint and steps into mglib --- mglib/step.py | 45 +++++++ mglib/storage.py | 10 ++ test/run.py | 23 ++++ test/test_endpoint.py | 303 ++++++++++++++++++++++++++++++++++++++++++ test/test_step.py | 12 ++ 5 files changed, 393 insertions(+) create mode 100644 mglib/step.py create mode 100644 mglib/storage.py create mode 100644 test/run.py create mode 100644 test/test_endpoint.py create mode 100644 test/test_step.py diff --git a/mglib/step.py b/mglib/step.py new file mode 100644 index 0000000..70a6fec --- /dev/null +++ b/mglib/step.py @@ -0,0 +1,45 @@ +class Step: + + # width of a document when displayed as 100%. + WIDTH_100p = 1240 + PERCENT = 100 + LIST = [125, 100, 75, 50, 10] + + # aspect ration for A4 paper is h = w * 1.41 + # for 100 + # 100 => w = 1240, h = 1748 + # 50 => w = 620, h = 874 + + def __init__(self, current=1): + self.current = current + + @property + def width(self): + p = self.percent / 100 + return int(p * Step.WIDTH_100p) + + @property + def is_thumbnail(self): + return self.percent < 50 + + @property + def is_for_hocr(self): + return not self.is_thumbnail + + @property + def percent(self): + return Step.LIST[self.current] + + def __str__(self): + return f"Step(percent={self.percent}, width={self.width})" + + def __repr__(self): + return self.__str__() + + +class Steps: + def __init__(self): + self.steps = [Step(0), Step(1), Step(2), Step(3), Step(4)] + + def __iter__(self): + return iter(self.steps) diff --git a/mglib/storage.py b/mglib/storage.py new file mode 100644 index 0000000..c85fc5a --- /dev/null +++ b/mglib/storage.py @@ -0,0 +1,10 @@ + +class Storage: + """ + Storage class which works with Endpointsf + """ + + def delete(self, ep): + pass + + diff --git a/test/run.py b/test/run.py new file mode 100644 index 0000000..5a412c6 --- /dev/null +++ b/test/run.py @@ -0,0 +1,23 @@ +import os +import sys +import argparse +from unittest.loader import TestLoader +from unittest.runner import TextTestRunner + +BASE_DIR = os.path.dirname( + os.path.abspath(__file__) +) + +test_loader = TestLoader() +test_runner = TextTestRunner() + +parser = argparse.ArgumentParser() + +tests = test_loader.discover( + start_dir=BASE_DIR, +) + +result = test_runner.run(tests) + +if not result.wasSuccessful(): + sys.exit(1) diff --git a/test/test_endpoint.py b/test/test_endpoint.py new file mode 100644 index 0000000..19355f1 --- /dev/null +++ b/test/test_endpoint.py @@ -0,0 +1,303 @@ +import unittest +import os + +from mglib.endpoint import ( + Endpoint, DocumentEp, PageEp, + get_bucketname, get_keyname +) +from mglib.step import Step + + +class TestOthers(unittest.TestCase): + + def test_getbucketname(self): + self.assertEqual( + get_bucketname("s3://my-bucket/some/path/to/x.pdf"), + "my-bucket" + ) + self.assertEqual( + get_bucketname("s3:/my-bucket/some/path/to/x.pdf"), + "my-bucket" + ) + self.assertEqual( + get_bucketname("s3:/my-bucket/"), + "my-bucket" + ) + self.assertEqual( + get_bucketname("s3:/my-bucket"), + "my-bucket" + ) + + def test_getkeyname(self): + self.assertEqual( + get_keyname("s3://my-bucket/some/path/to/x.pdf"), + "some/path/to/x.pdf" + ) + self.assertEqual( + get_keyname("s3:/my-bucket/some/path/to/x.pdf"), + "some/path/to/x.pdf" + ) + + +class TestEndpoint(unittest.TestCase): + + def test_s3_bucketname(self): + ep = Endpoint("s3:/constellation/") + self.assertTrue(ep.is_s3) + self.assertFalse(ep.is_local) + self.assertEqual( + ep.bucketname, + "constellation" + ) + + def test_s3_bucketname_no_slash(self): + ep = Endpoint("s3:/kakamaka") + self.assertEqual( + ep.bucketname, + "kakamaka" + ) + + def test_local(self): + ep = Endpoint("local:/var/media/files") + self.assertEqual( + ep.dirname, + "/var/media/files/" + ) + + def test_repr(self): + ep = Endpoint("s3:/bucket/") + self.assertEqual( + f"{ep}", + "Endpoint(s3:/bucket/)" + ) + + +class TestDocumentEp(unittest.TestCase): + + def setUp(self): + self.remote_ep = Endpoint("s3:/silver-bucket/") + self.local_ep = Endpoint("local:/var/media/") + + def test_document_url_key(self): + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="contract.pdf" + ) + self.assertEqual( + doc_ep.bucketname, + "silver-bucket" + ) + self.assertEqual( + doc_ep.key, + "docs/user_1/document_3/contract.pdf" + ) + + def test_document_url(self): + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="x.pdf" + ) + self.assertEqual( + doc_ep.url(ep=Endpoint.S3), + "s3:/silver-bucket/docs/user_1/document_3/x.pdf" + ) + self.assertEqual( + doc_ep.url(ep=Endpoint.LOCAL), + "/var/media/docs/user_1/document_3/x.pdf" + ) + + def test_empty_tenant(self): + """ + With no tenant specified - url to document will + be without tenant. + """ + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="x.pdf" + ) + self.assertEqual( + doc_ep.url(), + "/var/media/docs/user_1/document_3/x.pdf" + ) + + def test_inc_version(self): + """ + Document endpoints are now versioned. + Initial version is 0. + When version is 0, the "old" endpoint path applies i.e. + version is not included in the path. + After document is modified (blank page deleted for example), + its version is incremented. If document version is > 0, then + version is included in the path. + """ + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="x.pdf" + ) + doc_ep.inc_version() + + self.assertEqual( + doc_ep.url(), + "/var/media/docs/user_1/document_3/v1/x.pdf" + ) + self.assertEqual( + doc_ep.url(ep=Endpoint.S3), + "s3:/silver-bucket/docs/user_1/document_3/v1/x.pdf" + ) + + doc_ep.inc_version() + + self.assertEqual( + doc_ep.url(), + "/var/media/docs/user_1/document_3/v2/x.pdf" + ) + self.assertEqual( + doc_ep.url(ep=Endpoint.S3), + "s3:/silver-bucket/docs/user_1/document_3/v2/x.pdf" + ) + + def test_dirname(self): + ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + aux_dir="results", + file_name="x.pdf" + ) + self.assertEqual( + ep.dirname, + "/var/media/results/user_1/document_3/" + ) + + def test_pages_dirname(self): + ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + aux_dir="results", + file_name="x.pdf" + ) + self.assertEqual( + ep.pages_dirname, + "/var/media/results/user_1/document_3/pages/" + ) + + +class TestPageEp(unittest.TestCase): + def setUp(self): + self.remote_ep = Endpoint("s3:/silver-bucket/") + self.local_ep = Endpoint("local:/var/media/") + + def test_versioned_page_ep(self): + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="x.pdf" + ) + # document's version incremented + doc_ep.inc_version() + + page_ep = PageEp( + document_ep=doc_ep, + page_num=1, + page_count=3 + ) + self.assertEqual( + page_ep.url(), + "/var/media/results/user_1/document_3/v1/pages/page_1.txt" + ) + + def test_txt_url(self): + """ + Without any arguments + page_ep.url() returns page_ep.txt_url() + """ + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="x.pdf" + ) + page_ep = PageEp( + document_ep=doc_ep, + page_num=1, + step=Step(1), + page_count=3 + ) + self.assertEqual( + page_ep.url(), + page_ep.txt_url() + ) + + def test_ppmroot(self): + doc_ep = DocumentEp( + remote_endpoint=self.remote_ep, + local_endpoint=self.local_ep, + user_id=1, + document_id=3, + file_name="x.pdf" + ) + page_url = PageEp( + document_ep=doc_ep, + page_num=1, + step=Step(1), + page_count=3 + ) + self.assertEqual( + page_url.ppmroot, + (f"/var/media/results/user_1/" + f"document_3/pages/page_1/100/page") + ) + +# def test_hocr_exists(self): +# local_media = os.path.join( +# os.path.dirname(os.path.dirname(__file__)), +# "test", +# "media" +# ) +# remote_ep = Endpoint("s3:/test-papermerge/") +# local_ep = Endpoint(f"local:{local_media}") +# doc_ep = DocumentEp( +# remote_endpoint=remote_ep, +# local_endpoint=local_ep, +# user_id=1, +# document_id=3, +# file_name="x.pdf" +# ) +# page_ep1 = PageEp( +# document_ep=doc_ep, +# page_num=1, +# step=Step(1), +# page_count=3 +# ) +# self.assertTrue( +# page_ep1.hocr_exists() +# ) +# page_ep2 = PageEp( +# document_ep=doc_ep, +# page_num=2, +# step=Step(1), +# page_count=3 +# ) +# self.assertFalse( +# page_ep2.hocr_exists() +# ) + + diff --git a/test/test_step.py b/test/test_step.py new file mode 100644 index 0000000..59addd4 --- /dev/null +++ b/test/test_step.py @@ -0,0 +1,12 @@ +import unittest +from pmworker.step import Step + + +class TestStep(unittest.TestCase): + + def test_step(self): + step = Step(1) + self.assertFalse( + step.is_thumbnail, + f"{step} is is_thumbnail, but it should not be!" + )