aboutsummaryrefslogtreecommitdiff
path: root/tests/incite/test_collection_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/incite/test_collection_base.py')
-rw-r--r--tests/incite/test_collection_base.py318
1 files changed, 318 insertions, 0 deletions
diff --git a/tests/incite/test_collection_base.py b/tests/incite/test_collection_base.py
new file mode 100644
index 0000000..497e5ab
--- /dev/null
+++ b/tests/incite/test_collection_base.py
@@ -0,0 +1,318 @@
+from datetime import datetime, timezone, timedelta
+from os.path import exists as pexists, join as pjoin
+from pathlib import Path
+from uuid import uuid4
+
+import numpy as np
+import pandas as pd
+import pytest
+from _pytest._code.code import ExceptionInfo
+
+from generalresearch.incite.base import CollectionBase
+from test_utils.incite.conftest import mnt_filepath
+
+AGO_15min = (datetime.now(tz=timezone.utc) - timedelta(minutes=15)).replace(
+ microsecond=0
+)
+AGO_1HR = (datetime.now(tz=timezone.utc) - timedelta(hours=1)).replace(microsecond=0)
+AGO_2HR = (datetime.now(tz=timezone.utc) - timedelta(hours=2)).replace(microsecond=0)
+
+
+class TestCollectionBase:
+ def test_init(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.df.empty is True
+
+ def test_init_df(self, mnt_filepath):
+ # Only an empty pd.DataFrame can ever be provided
+ instance = CollectionBase(
+ df=pd.DataFrame({}), archive_path=mnt_filepath.data_src
+ )
+ assert isinstance(instance.df, pd.DataFrame)
+
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(
+ df=pd.DataFrame(columns=[0, 1, 2]), archive_path=mnt_filepath.data_src
+ )
+ assert "Do not provide a pd.DataFrame" in str(cm.value)
+
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(
+ df=pd.DataFrame(np.random.randint(100, size=(1000, 1)), columns=["A"]),
+ archive_path=mnt_filepath.data_src,
+ )
+ assert "Do not provide a pd.DataFrame" in str(cm.value)
+
+ def test_init_start(self, mnt_filepath):
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(
+ start=datetime.now(tz=timezone.utc) - timedelta(days=10),
+ archive_path=mnt_filepath.data_src,
+ )
+ assert "Collection.start must not have microseconds" in str(cm.value)
+
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ tz = timezone(timedelta(hours=-5), "EST")
+
+ CollectionBase(
+ start=datetime(year=2000, month=1, day=1, tzinfo=tz),
+ archive_path=mnt_filepath.data_src,
+ )
+ assert "Timezone is not UTC" in str(cm.value)
+
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.start == datetime(
+ year=2018, month=1, day=1, tzinfo=timezone.utc
+ )
+
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(
+ start=AGO_2HR, offset="3h", archive_path=mnt_filepath.data_src
+ )
+ assert "Offset must be equal to, or smaller the start timestamp" in str(
+ cm.value
+ )
+
+ def test_init_archive_path(self, mnt_filepath):
+ """DirectoryPath is apparently smart enough to confirm that the
+ directory path exists.
+ """
+
+ # (1) Basic, confirm an existing path works
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.archive_path == mnt_filepath.data_src
+
+ # (2) It can't point to a file
+ file_path = Path(pjoin(mnt_filepath.data_src, f"{uuid4().hex}.zip"))
+ assert not pexists(file_path)
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(archive_path=file_path)
+ assert "Path does not point to a directory" in str(cm.value)
+
+ # (3) It doesn't create the directory if it doesn't exist
+ new_path = Path(pjoin(mnt_filepath.data_src, f"{uuid4().hex}/"))
+ assert not pexists(new_path)
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(archive_path=new_path)
+ assert "Path does not point to a directory" in str(cm.value)
+
+ def test_init_offset(self, mnt_filepath):
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(offset="1:X", archive_path=mnt_filepath.data_src)
+ assert "Invalid offset alias provided. Please review:" in str(cm.value)
+
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(offset=f"59sec", archive_path=mnt_filepath.data_src)
+ assert "Must be equal to, or longer than 1 min" in str(cm.value)
+
+ with pytest.raises(expected_exception=ValueError) as cm:
+ cm: ExceptionInfo
+ CollectionBase(offset=f"{365 * 101}d", archive_path=mnt_filepath.data_src)
+ assert "String should have at most 5 characters" in str(cm.value)
+
+
+class TestCollectionBaseProperties:
+
+ def test_items(self, mnt_filepath):
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ cm: ExceptionInfo
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ x = instance.items
+ assert "Must override" in str(cm.value)
+
+ def test_interval_range(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ # Private method requires the end parameter
+ with pytest.raises(expected_exception=AssertionError) as cm:
+ cm: ExceptionInfo
+ instance._interval_range(end=None)
+ assert "an end value must be provided" in str(cm.value)
+
+ # End param must be same as started (which forces utc)
+ tz = timezone(timedelta(hours=-5), "EST")
+ with pytest.raises(expected_exception=AssertionError) as cm:
+ cm: ExceptionInfo
+ instance._interval_range(end=datetime.now(tz=tz))
+ assert "Timezones must match" in str(cm.value)
+
+ res = instance._interval_range(end=datetime.now(tz=timezone.utc))
+ assert isinstance(res, pd.IntervalIndex)
+ assert res.closed_left
+ assert res.is_non_overlapping_monotonic
+ assert res.is_monotonic_increasing
+ assert res.is_unique
+
+ def test_interval_range2(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert isinstance(instance.interval_range, list)
+
+ # 1 hrs ago has 2 x 30min + the future 30min
+ OFFSET = "30min"
+ instance = CollectionBase(
+ start=AGO_1HR, offset=OFFSET, archive_path=mnt_filepath.data_src
+ )
+ assert len(instance.interval_range) == 3
+ assert instance.interval_range[0][0] == AGO_1HR
+
+ # 1 hrs ago has 1 x 60min + the future 60min
+ OFFSET = "60min"
+ instance = CollectionBase(
+ start=AGO_1HR, offset=OFFSET, archive_path=mnt_filepath.data_src
+ )
+ assert len(instance.interval_range) == 2
+
+ def test_progress(self, mnt_filepath):
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ cm: ExceptionInfo
+ instance = CollectionBase(
+ start=AGO_15min, offset="3min", archive_path=mnt_filepath.data_src
+ )
+ x = instance.progress
+ assert "Must override" in str(cm.value)
+
+ def test_progress2(self, mnt_filepath):
+ instance = CollectionBase(
+ start=AGO_2HR,
+ offset="15min",
+ archive_path=mnt_filepath.data_src,
+ )
+ assert instance.df.empty
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ df = instance.progress
+ assert "Must override" in str(cm.value)
+
+ def test_items2(self, mnt_filepath):
+ """There can't be a test for this because the Items need a path whic
+ isn't possible in the generic form
+ """
+ instance = CollectionBase(
+ start=AGO_1HR, offset="5min", archive_path=mnt_filepath.data_src
+ )
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ cm: ExceptionInfo
+ items = instance.items
+ assert "Must override" in str(cm.value)
+
+ # item = items[-3]
+ # ddf = instance.ddf(items=[item], include_partial=True, force_rr_latest=False)
+ # df = item.validate_ddf(ddf=ddf)
+ # assert isinstance(df, pd.DataFrame)
+ # assert len(df.columns) == 16
+ # assert str(df.product_id.dtype) == "object"
+ # assert str(ddf.product_id.dtype) == "string"
+
+ def test_items3(self, mnt_filepath):
+ instance = CollectionBase(
+ start=AGO_2HR,
+ offset="15min",
+ archive_path=mnt_filepath.data_src,
+ )
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ item = instance.items[0]
+ assert "Must override" in str(cm.value)
+
+
+class TestCollectionBaseMethodsCleanup:
+ def test_fetch_force_rr_latest(self, mnt_filepath):
+ coll = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ with pytest.raises(expected_exception=Exception) as cm:
+ cm: ExceptionInfo
+ coll.fetch_force_rr_latest(sources=[])
+ assert "Must override" in str(cm.value)
+
+ def test_fetch_all_paths(self, mnt_filepath):
+ coll = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ cm: ExceptionInfo
+ coll.fetch_all_paths(
+ items=None, force_rr_latest=False, include_partial=False
+ )
+ assert "Must override" in str(cm.value)
+
+
+class TestCollectionBaseMethodsCleanup:
+ @pytest.mark.skip
+ def test_cleanup_partials(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.cleanup_partials() is None # it doesn't return anything
+
+ def test_clear_tmp_archives(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.clear_tmp_archives() is None # it doesn't return anything
+
+ @pytest.mark.skip
+ def test_clear_corrupt_archives(self, mnt_filepath):
+ """TODO: expand this so it actually has corrupt archives that we
+ check to see if they're removed
+ """
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.clear_corrupt_archives() is None # it doesn't return anything
+
+ @pytest.mark.skip
+ def test_rebuild_symlinks(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ assert instance.rebuild_symlinks() is None
+
+
+class TestCollectionBaseMethodsSourceTiming:
+
+ def test_get_item(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+ i = pd.Interval(left=1, right=2, closed="left")
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ instance.get_item(interval=i)
+ assert "Must override" in str(cm.value)
+
+ def test_get_item_start(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ dt = datetime.now(tz=timezone.utc)
+ start = pd.Timestamp(dt)
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ instance.get_item_start(start=start)
+ assert "Must override" in str(cm.value)
+
+ def test_get_items(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ dt = datetime.now(tz=timezone.utc)
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ instance.get_items(since=dt)
+ assert "Must override" in str(cm.value)
+
+ def test_get_items_from_year(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ instance.get_items_from_year(year=2020)
+ assert "Must override" in str(cm.value)
+
+ def test_get_items_last90(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ instance.get_items_last90()
+ assert "Must override" in str(cm.value)
+
+ def test_get_items_last365(self, mnt_filepath):
+ instance = CollectionBase(archive_path=mnt_filepath.data_src)
+
+ with pytest.raises(expected_exception=NotImplementedError) as cm:
+ instance.get_items_last365()
+ assert "Must override" in str(cm.value)