aboutsummaryrefslogtreecommitdiff
path: root/tests/incite/collections/test_df_collection_base.py
blob: 5aaa7290de2d40c127ce33f11f6222dba8fc30b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from datetime import datetime, timezone

import pandas as pd
import pytest
from pandera import DataFrameSchema

from generalresearch.incite.collections import (
    DFCollectionType,
    DFCollection,
)
from test_utils.incite.conftest import mnt_filepath

df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType.TEST]


@pytest.mark.parametrize("df_coll_type", df_collection_types)
class TestDFCollectionBase:
    """None of these tests are about the DFCollection with any specific
    data_type... that will be handled in other parameterized tests

    """

    def test_init(self, mnt_filepath, df_coll_type):
        """Try to initialize the DFCollection with various invalid parameters"""
        with pytest.raises(expected_exception=ValueError) as cm:
            DFCollection(archive_path=mnt_filepath.data_src)
        assert "Must explicitly provide a data_type" in str(cm.value)

        # with pytest.raises(expected_exception=ValueError) as cm:
        #     DFCollection(
        #         data_type=DFCollectionType.TEST, archive_path=mnt_filepath.data_src
        #     )
        # assert "Must provide a supported data_type" in str(cm.value)

        instance = DFCollection(
            data_type=DFCollectionType.WALL, archive_path=mnt_filepath.data_src
        )
        assert instance.data_type == DFCollectionType.WALL


@pytest.mark.parametrize("df_coll_type", df_collection_types)
class TestDFCollectionBaseProperties:

    @pytest.mark.skip
    def test_df_collection_items(self, mnt_filepath, df_coll_type):
        instance = DFCollection(
            data_type=df_coll_type,
            start=datetime(year=1800, month=1, day=1, tzinfo=timezone.utc),
            finished=datetime(year=1900, month=1, day=1, tzinfo=timezone.utc),
            offset="100d",
            archive_path=mnt_filepath.archive_path(enum_type=df_coll_type),
        )

        assert len(instance.interval_range) == len(instance.items)
        assert len(instance.items) == 366

    def test_df_collection_progress(self, mnt_filepath, df_coll_type):
        instance = DFCollection(
            data_type=df_coll_type,
            start=datetime(year=1800, month=1, day=1, tzinfo=timezone.utc),
            finished=datetime(year=1900, month=1, day=1, tzinfo=timezone.utc),
            offset="100d",
            archive_path=mnt_filepath.archive_path(enum_type=df_coll_type),
        )

        # Progress returns a dataframe with a row each Item
        assert isinstance(instance.progress, pd.DataFrame)
        assert instance.progress.shape == (366, 6)

    def test_df_collection_schema(self, mnt_filepath, df_coll_type):
        instance1 = DFCollection(
            data_type=DFCollectionType.WALL, archive_path=mnt_filepath.data_src
        )

        instance2 = DFCollection(
            data_type=DFCollectionType.SESSION, archive_path=mnt_filepath.data_src
        )

        assert instance1._schema != instance2._schema
        assert isinstance(instance1._schema, DataFrameSchema)
        assert isinstance(instance2._schema, DataFrameSchema)


class TestDFCollectionBaseMethods:

    @pytest.mark.skip
    def test_initial_load(self, mnt_filepath, thl_web_rr):
        instance = DFCollection(
            pg_config=thl_web_rr,
            data_type=DFCollectionType.USER,
            start=datetime(year=2022, month=1, day=1, minute=0, tzinfo=timezone.utc),
            finished=datetime(year=2022, month=1, day=1, minute=5, tzinfo=timezone.utc),
            offset="2min",
            archive_path=mnt_filepath.data_src,
        )

        # Confirm that there are no archives available yet
        assert instance.progress.has_archive.eq(False).all()

        instance.initial_load()
        assert 47 == len(instance.ddf().index)
        assert instance.progress.should_archive.eq(True).all()

        # A few archives should have been made
        assert not instance.progress.has_archive.eq(False).all()

    @pytest.mark.skip
    def test_fetch_force_rr_latest(self):
        pass

    @pytest.mark.skip
    def test_force_rr_latest(self):
        pass