aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/grliq/managers/test_forensic_data.py50
-rw-r--r--tests/grliq/managers/test_forensic_results.py15
-rw-r--r--tests/grliq/models/test_forensic_data.py34
-rw-r--r--tests/grliq/test_utils.py5
-rw-r--r--tests/incite/collections/test_df_collection_base.py16
-rw-r--r--tests/incite/collections/test_df_collection_item_base.py18
-rw-r--r--tests/incite/collections/test_df_collection_item_thl_web.py241
-rw-r--r--tests/incite/collections/test_df_collection_thl_marketplaces.py9
-rw-r--r--tests/incite/collections/test_df_collection_thl_web.py62
-rw-r--r--tests/incite/mergers/foundations/test_enriched_session.py14
10 files changed, 273 insertions, 191 deletions
diff --git a/tests/grliq/managers/test_forensic_data.py b/tests/grliq/managers/test_forensic_data.py
index ed4da80..ac2792a 100644
--- a/tests/grliq/managers/test_forensic_data.py
+++ b/tests/grliq/managers/test_forensic_data.py
@@ -1,14 +1,21 @@
from datetime import timedelta
+from typing import TYPE_CHECKING
from uuid import uuid4
import pytest
-from generalresearch.grliq.models.events import TimingData, MouseEvent
-from generalresearch.grliq.models.forensic_data import GrlIqData
-from generalresearch.grliq.models.forensic_result import (
- GrlIqCheckerResults,
- GrlIqForensicCategoryResult,
-)
+if TYPE_CHECKING:
+ from generalresearch.grliq.managers.forensic_data import (
+ GrlIqDataManager,
+ GrlIqEventManager,
+ )
+ from generalresearch.grliq.models.events import MouseEvent, TimingData
+ from generalresearch.grliq.models.forensic_data import GrlIqData
+ from generalresearch.grliq.models.forensic_result import (
+ GrlIqCheckerResults,
+ GrlIqForensicCategoryResult,
+ )
+ from generalresearch.models.thl.product import Product
try:
from psycopg.errors import UniqueViolation
@@ -18,18 +25,16 @@ except ImportError:
class TestGrlIqDataManager:
- def test_create_dummy(self, grliq_dm):
- from generalresearch.grliq.managers.forensic_data import GrlIqDataManager
+ def test_create_dummy(self, grliq_dm: "GrlIqDataManager"):
from generalresearch.grliq.models.forensic_data import GrlIqData
- grliq_dm: GrlIqDataManager
gd1: GrlIqData = grliq_dm.create_dummy(is_attempt_allowed=True)
assert isinstance(gd1, GrlIqData)
assert isinstance(gd1.results, GrlIqCheckerResults)
assert isinstance(gd1.category_result, GrlIqForensicCategoryResult)
- def test_create(self, grliq_data, grliq_dm):
+ def test_create(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"):
grliq_dm.create(grliq_data)
assert grliq_data.id is not None
@@ -45,20 +50,16 @@ class TestGrlIqDataManager:
pass
@pytest.mark.skip(reason="todo")
- def test_update_fingerprint(self):
- pass
-
- @pytest.mark.skip(reason="todo")
def test_update_data(self):
pass
- def test_get_id(self, grliq_data, grliq_dm):
+ def test_get_id(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"):
grliq_dm.create(grliq_data)
res = grliq_dm.get_data(forensic_id=grliq_data.id)
assert res == grliq_data
- def test_get_uuid(self, grliq_data, grliq_dm):
+ def test_get_uuid(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"):
grliq_dm.create(grliq_data)
res = grliq_dm.get_data(forensic_uuid=grliq_data.uuid)
@@ -72,7 +73,7 @@ class TestGrlIqDataManager:
def test_get_unique_user_count_by_fingerprint(self):
pass
- def test_filter_data(self, grliq_data, grliq_dm):
+ def test_filter_data(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"):
grliq_dm.create(grliq_data)
res = grliq_dm.filter_data(uuids=[grliq_data.uuid])[0]
assert res == grliq_data
@@ -99,7 +100,7 @@ class TestGrlIqDataManager:
def test_make_filter_str(self):
pass
- def test_filter_count(self, grliq_dm, product):
+ def test_filter_count(self, grliq_dm: "GrlIqDataManager", product: "Product"):
res = grliq_dm.filter_count(product_id=product.uuid)
assert isinstance(res, int)
@@ -115,7 +116,7 @@ class TestGrlIqDataManager:
class TestForensicDataGetAndFilter:
- def test_events(self, grliq_dm, grliq_em):
+ def test_events(self, grliq_dm: "GrlIqDataManager"):
"""If load_events=True, the events and mouse_events attributes should
be an array no matter what. An empty array means that the events were
loaded, but there were no events available.
@@ -140,7 +141,7 @@ class TestForensicDataGetAndFilter:
assert len(instance.events) == 0
assert len(instance.mouse_events) == 0
- def test_timing(self, grliq_dm, grliq_em):
+ def test_timing(self, grliq_dm: "GrlIqDataManager", grliq_em: "GrlIqEventManager"):
forensic_uuid = uuid4().hex
grliq_dm.create_dummy(is_attempt_allowed=True, uuid=forensic_uuid)
@@ -152,13 +153,16 @@ class TestForensicDataGetAndFilter:
client_rtts=[100, 200, 150], server_rtts=[150, 120, 120]
),
)
+
instance = grliq_dm.get_data(forensic_uuid=forensic_uuid, load_events=True)
assert isinstance(instance, GrlIqData)
assert isinstance(instance.events, list)
assert isinstance(instance.mouse_events, list)
assert isinstance(instance.timing_data, TimingData)
- def test_events_events(self, grliq_dm, grliq_em):
+ def test_events_events(
+ self, grliq_dm: "GrlIqDataManager", grliq_em: "GrlIqEventManager"
+ ):
forensic_uuid = uuid4().hex
grliq_dm.create_dummy(is_attempt_allowed=True, uuid=forensic_uuid)
@@ -181,7 +185,9 @@ class TestForensicDataGetAndFilter:
assert len(instance.pointer_move_events) == 0
assert len(instance.keyboard_events) == 0
- def test_events_click(self, grliq_dm, grliq_em):
+ def test_events_click(
+ self, grliq_dm: "GrlIqDataManager", grliq_em: "GrlIqEventManager"
+ ):
forensic_uuid = uuid4().hex
grliq_dm.create_dummy(is_attempt_allowed=True, uuid=forensic_uuid)
instance = grliq_dm.get_data(forensic_uuid=forensic_uuid, load_events=True)
diff --git a/tests/grliq/managers/test_forensic_results.py b/tests/grliq/managers/test_forensic_results.py
index a837a64..68db732 100644
--- a/tests/grliq/managers/test_forensic_results.py
+++ b/tests/grliq/managers/test_forensic_results.py
@@ -1,9 +1,20 @@
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from generalresearch.grliq.managers.forensic_data import GrlIqDataManager
+ from generalresearch.grliq.managers.forensic_results import (
+ GrlIqCategoryResultsReader,
+ )
+
+
class TestGrlIqCategoryResultsReader:
- def test_filter_category_results(self, grliq_dm, grliq_crr):
+ def test_filter_category_results(
+ self, grliq_dm: "GrlIqDataManager", grliq_crr: "GrlIqCategoryResultsReader"
+ ):
from generalresearch.grliq.models.forensic_result import (
- Phase,
GrlIqForensicCategoryResult,
+ Phase,
)
# this is just testing that it doesn't fail
diff --git a/tests/grliq/models/test_forensic_data.py b/tests/grliq/models/test_forensic_data.py
index 653f9a9..4fbf962 100644
--- a/tests/grliq/models/test_forensic_data.py
+++ b/tests/grliq/models/test_forensic_data.py
@@ -1,42 +1,50 @@
+from typing import TYPE_CHECKING
+
import pytest
from pydantic import ValidationError
-from generalresearch.grliq.models.forensic_data import GrlIqData, Platform
+if TYPE_CHECKING:
+ from generalresearch.grliq.models.forensic_data import GrlIqData
class TestGrlIqData:
- def test_supported_fonts(self, grliq_data):
+ def test_supported_fonts(self, grliq_data: "GrlIqData"):
s = grliq_data.supported_fonts_binary
assert len(s) == 1043
assert "Ubuntu" in grliq_data.supported_fonts
- def test_battery(self, grliq_data):
+ def test_battery(self, grliq_data: "GrlIqData"):
assert not grliq_data.battery_charging
assert grliq_data.battery_level == 0.41
- def test_base(self, grliq_data):
- g: GrlIqData = grliq_data
- assert g.timezone == "America/Los_Angeles"
- assert g.platform == Platform.LINUX_X86_64
- assert g.webgl_extensions
+ def test_base(self, grliq_data: "GrlIqData"):
+ from generalresearch.grliq.models.forensic_data import Platform
+
+ assert grliq_data.timezone == "America/Los_Angeles"
+ assert grliq_data.platform == Platform.LINUX_X86_64
+ assert grliq_data.webgl_extensions
# ... more
- assert g.results is None
- assert g.category_result is None
+ assert grliq_data.results is None
+ assert grliq_data.category_result is None
+
+ s = grliq_data.model_dump_json()
+ from generalresearch.grliq.models.forensic_data import GrlIqData, Platform
- s = g.model_dump_json()
g2: GrlIqData = GrlIqData.model_validate_json(s)
assert g2.results is None
assert g2.category_result is None
- assert g == g2
+ assert grliq_data == g2
# Testing things that will cause a validation error, should only be
# because something is "corrupt", not b/c the user is a baddie
- def test_corrupt(self, grliq_data):
+ def test_corrupt(self, grliq_data: "GrlIqData"):
"""Test for timestamp and timezone offset mismatch validation."""
+ from generalresearch.grliq.models.forensic_data import GrlIqData
+
d = grliq_data.model_dump(mode="json")
d.update(
{
diff --git a/tests/grliq/test_utils.py b/tests/grliq/test_utils.py
index d9034d5..7f794e8 100644
--- a/tests/grliq/test_utils.py
+++ b/tests/grliq/test_utils.py
@@ -1,10 +1,13 @@
+from datetime import datetime
from pathlib import Path
from uuid import uuid4
class TestUtils:
- def test_get_screenshot_fp(self, mnt_grliq_archive_dir, utc_hour_ago):
+ def test_get_screenshot_fp(
+ self, mnt_grliq_archive_dir: str, utc_hour_ago: datetime
+ ):
from generalresearch.grliq.utils import get_screenshot_fp
fp1 = get_screenshot_fp(
diff --git a/tests/incite/collections/test_df_collection_base.py b/tests/incite/collections/test_df_collection_base.py
index 5aaa729..10f025b 100644
--- a/tests/incite/collections/test_df_collection_base.py
+++ b/tests/incite/collections/test_df_collection_base.py
@@ -1,15 +1,19 @@
from datetime import datetime, timezone
+from typing import TYPE_CHECKING
import pandas as pd
import pytest
from pandera import DataFrameSchema
from generalresearch.incite.collections import (
- DFCollectionType,
DFCollection,
+ DFCollectionType,
)
from test_utils.incite.conftest import mnt_filepath
+if TYPE_CHECKING:
+ from generalresearch.incite.base import GRLDatasets
+
df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType.TEST]
@@ -20,7 +24,7 @@ class TestDFCollectionBase:
"""
- def test_init(self, mnt_filepath, df_coll_type):
+ def test_init(self, mnt_filepath: "GRLDatasets", df_coll_type: DFCollectionType):
"""Try to initialize the DFCollection with various invalid parameters"""
with pytest.raises(expected_exception=ValueError) as cm:
DFCollection(archive_path=mnt_filepath.data_src)
@@ -42,7 +46,7 @@ class TestDFCollectionBase:
class TestDFCollectionBaseProperties:
@pytest.mark.skip
- def test_df_collection_items(self, mnt_filepath, df_coll_type):
+ def test_df_collection_items(self, mnt_filepath: "GRLDatasets", df_coll_type):
instance = DFCollection(
data_type=df_coll_type,
start=datetime(year=1800, month=1, day=1, tzinfo=timezone.utc),
@@ -54,7 +58,7 @@ class TestDFCollectionBaseProperties:
assert len(instance.interval_range) == len(instance.items)
assert len(instance.items) == 366
- def test_df_collection_progress(self, mnt_filepath, df_coll_type):
+ def test_df_collection_progress(self, mnt_filepath: "GRLDatasets", df_coll_type):
instance = DFCollection(
data_type=df_coll_type,
start=datetime(year=1800, month=1, day=1, tzinfo=timezone.utc),
@@ -67,7 +71,7 @@ class TestDFCollectionBaseProperties:
assert isinstance(instance.progress, pd.DataFrame)
assert instance.progress.shape == (366, 6)
- def test_df_collection_schema(self, mnt_filepath, df_coll_type):
+ def test_df_collection_schema(self, mnt_filepath: "GRLDatasets", df_coll_type):
instance1 = DFCollection(
data_type=DFCollectionType.WALL, archive_path=mnt_filepath.data_src
)
@@ -84,7 +88,7 @@ class TestDFCollectionBaseProperties:
class TestDFCollectionBaseMethods:
@pytest.mark.skip
- def test_initial_load(self, mnt_filepath, thl_web_rr):
+ def test_initial_load(self, mnt_filepath: "GRLDatasets", thl_web_rr):
instance = DFCollection(
pg_config=thl_web_rr,
data_type=DFCollectionType.USER,
diff --git a/tests/incite/collections/test_df_collection_item_base.py b/tests/incite/collections/test_df_collection_item_base.py
index a0c0b0b..136d234 100644
--- a/tests/incite/collections/test_df_collection_item_base.py
+++ b/tests/incite/collections/test_df_collection_item_base.py
@@ -1,13 +1,17 @@
from datetime import datetime, timezone
+from typing import TYPE_CHECKING
import pytest
from generalresearch.incite.collections import (
- DFCollectionType,
- DFCollectionItem,
DFCollection,
+ DFCollectionItem,
+ DFCollectionType,
)
-from test_utils.incite.conftest import mnt_filepath
+from generalresearch.pg_helper import PostgresConfig
+
+if TYPE_CHECKING:
+ from generalresearch.incite.base import GRLDatasets
df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType.TEST]
@@ -15,7 +19,7 @@ df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType.
@pytest.mark.parametrize("df_coll_type", df_collection_types)
class TestDFCollectionItemBase:
- def test_init(self, mnt_filepath, df_coll_type):
+ def test_init(self, mnt_filepath: "GRLDatasets", df_coll_type):
collection = DFCollection(
data_type=df_coll_type,
offset="100d",
@@ -41,7 +45,7 @@ class TestDFCollectionItemProperties:
@pytest.mark.parametrize("df_coll_type", df_collection_types)
class TestDFCollectionItemMethods:
- def test_has_mysql_false(self, mnt_filepath, df_coll_type):
+ def test_has_mysql_false(self, mnt_filepath: "GRLDatasets", df_coll_type):
collection = DFCollection(
data_type=df_coll_type,
offset="100d",
@@ -53,7 +57,9 @@ class TestDFCollectionItemMethods:
instance1: DFCollectionItem = collection.items[0]
assert not instance1.has_mysql()
- def test_has_mysql_true(self, thl_web_rr, mnt_filepath, df_coll_type):
+ def test_has_mysql_true(
+ self, thl_web_rr: PostgresConfig, mnt_filepath: "GRLDatasets", df_coll_type
+ ):
collection = DFCollection(
data_type=df_coll_type,
offset="100d",
diff --git a/tests/incite/collections/test_df_collection_item_thl_web.py b/tests/incite/collections/test_df_collection_item_thl_web.py
index 9c3d67a..29f3677 100644
--- a/tests/incite/collections/test_df_collection_item_thl_web.py
+++ b/tests/incite/collections/test_df_collection_item_thl_web.py
@@ -1,7 +1,8 @@
-from datetime import datetime, timezone, timedelta
+from datetime import datetime, timedelta, timezone
from itertools import product as iter_product
from os.path import join as pjoin
-from pathlib import PurePath, Path
+from pathlib import Path, PurePath
+from typing import TYPE_CHECKING, Callable
from uuid import uuid4
import dask.dataframe as dd
@@ -11,13 +12,13 @@ from distributed import Client, Scheduler, Worker
# noinspection PyUnresolvedReferences
from distributed.utils_test import (
- gen_cluster,
+ cleanup,
+ client,
client_no_amm,
+ cluster_fixture,
+ gen_cluster,
loop,
loop_in_thread,
- cleanup,
- cluster_fixture,
- client,
)
from faker import Faker
from pandera import DataFrameSchema
@@ -29,10 +30,14 @@ from generalresearch.incite.collections import (
DFCollectionType,
)
from generalresearch.incite.schemas import ARCHIVE_AFTER
+from generalresearch.models.thl.product import Product
from generalresearch.models.thl.user import User
from generalresearch.pg_helper import PostgresConfig
from generalresearch.sql_helper import PostgresDsn
-from test_utils.incite.conftest import mnt_filepath, incite_item_factory
+from test_utils.incite.conftest import incite_item_factory, mnt_filepath
+
+if TYPE_CHECKING:
+ from generalresearch.incite.base import GRLDatasets
fake = Faker()
@@ -71,7 +76,7 @@ class TestDFCollectionItemBase:
)
class TestDFCollectionItemProperties:
- def test_filename(self, df_collection_data_type, df_collection, offset):
+ def test_filename(self, df_collection_data_type, df_collection, offset: str):
for i in df_collection.items:
assert isinstance(i.filename, str)
@@ -88,35 +93,37 @@ class TestDFCollectionItemProperties:
)
class TestDFCollectionItemPropertiesBase:
- def test_name(self, df_collection_data_type, offset, df_collection):
+ def test_name(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.name, str)
- def test_finish(self, df_collection_data_type, offset, df_collection):
+ def test_finish(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.finish, datetime)
- def test_interval(self, df_collection_data_type, offset, df_collection):
+ def test_interval(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.interval, pd.Interval)
- def test_partial_filename(self, df_collection_data_type, offset, df_collection):
+ def test_partial_filename(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
for i in df_collection.items:
assert isinstance(i.partial_filename, str)
- def test_empty_filename(self, df_collection_data_type, offset, df_collection):
+ def test_empty_filename(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.empty_filename, str)
- def test_path(self, df_collection_data_type, offset, df_collection):
+ def test_path(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.path, FilePath)
- def test_partial_path(self, df_collection_data_type, offset, df_collection):
+ def test_partial_path(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.partial_path, FilePath)
- def test_empty_path(self, df_collection_data_type, offset, df_collection):
+ def test_empty_path(self, df_collection_data_type, offset: str, df_collection):
for i in df_collection.items:
assert isinstance(i.empty_path, FilePath)
@@ -136,9 +143,9 @@ class TestDFCollectionItemMethod:
def test_has_mysql(
self,
df_collection,
- thl_web_rr,
- offset,
- duration,
+ thl_web_rr: PostgresConfig,
+ offset: str,
+ duration: timedelta,
df_collection_data_type,
delete_df_collection,
):
@@ -166,9 +173,9 @@ class TestDFCollectionItemMethod:
def test_update_partial_archive(
self,
df_collection,
- offset,
- duration,
- thl_web_rw,
+ offset: str,
+ duration: timedelta,
+ thl_web_rw: PostgresConfig,
df_collection_data_type,
delete_df_collection,
):
@@ -181,26 +188,26 @@ class TestDFCollectionItemMethod:
def test_create_partial_archive(
self,
df_collection,
- offset,
- duration,
+ offset: str,
+ duration: str,
create_main_accounts,
- thl_web_rw,
+ thl_web_rw: PostgresConfig,
thl_lm,
df_collection_data_type,
- user_factory,
- product,
+ user_factory: Callable[..., User],
+ product: Product,
client_no_amm,
incite_item_factory,
delete_df_collection,
- mnt_filepath,
+ mnt_filepath: "GRLDatasets",
):
assert 1 + 1 == 2
def test_dict(
self,
df_collection_data_type,
- offset,
- duration,
+ offset: str,
+ duration: timedelta,
df_collection,
delete_df_collection,
):
@@ -224,12 +231,12 @@ class TestDFCollectionItemMethod:
self,
df_collection_data_type,
df_collection,
- offset,
- duration,
+ offset: str,
+ duration: timedelta,
create_main_accounts,
- thl_web_rw,
- user_factory,
- product,
+ thl_web_rw: PostgresConfig,
+ user_factory: Callable[..., User],
+ product: Product,
incite_item_factory,
delete_df_collection,
):
@@ -270,10 +277,10 @@ class TestDFCollectionItemMethod:
self,
df_collection_data_type,
df_collection,
- offset,
- duration,
- user_factory,
- product,
+ offset: str,
+ duration: timedelta,
+ user_factory: Callable[..., User],
+ product: Product,
incite_item_factory,
delete_df_collection,
):
@@ -316,15 +323,15 @@ class TestDFCollectionItemMethod:
def test_from_mysql_ledger(
self,
df_collection,
- user,
+ user: User,
create_main_accounts,
- offset,
- duration,
- thl_web_rw,
+ offset: str,
+ duration: timedelta,
+ thl_web_rw: PostgresConfig,
thl_lm,
df_collection_data_type,
- user_factory,
- product,
+ user_factory: Callable[..., User],
+ product: Product,
client_no_amm,
incite_item_factory,
delete_df_collection,
@@ -371,12 +378,12 @@ class TestDFCollectionItemMethod:
def test_to_archive(
self,
df_collection,
- user,
- offset,
- duration,
+ user: User,
+ offset: str,
+ duration: timedelta,
df_collection_data_type,
- user_factory,
- product,
+ user_factory: Callable[..., User],
+ product: Product,
client_no_amm,
incite_item_factory,
delete_df_collection,
@@ -410,12 +417,12 @@ class TestDFCollectionItemMethod:
self,
df_collection_data_type,
df_collection,
- user_factory,
- product,
- offset,
- duration,
+ user_factory: Callable[..., User],
+ product: Product,
+ offset: str,
+ duration: timedelta,
client_no_amm,
- user,
+ user: User,
incite_item_factory,
delete_df_collection,
mnt_filepath,
@@ -481,19 +488,19 @@ class TestDFCollectionItemMethod:
@pytest.mark.skip
def test_to_archive_numbered_partial(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@pytest.mark.skip
def test_initial_load(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@pytest.mark.skip
def test_clear_corrupt_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@@ -505,28 +512,36 @@ class TestDFCollectionItemMethod:
class TestDFCollectionItemMethodBase:
@pytest.mark.skip
- def test_path_exists(self, df_collection_data_type, offset, duration):
+ def test_path_exists(
+ self, df_collection_data_type, offset: str, duration: timedelta
+ ):
pass
@pytest.mark.skip
- def test_next_numbered_path(self, df_collection_data_type, offset, duration):
+ def test_next_numbered_path(
+ self, df_collection_data_type, offset: str, duration: timedelta
+ ):
pass
@pytest.mark.skip
def test_search_highest_numbered_path(
- self, df_collection_data_type, offset, duration
+ self, df_collection_data_type, offset: str, duration: timedelta
):
pass
@pytest.mark.skip
- def test_tmp_filename(self, df_collection_data_type, offset, duration):
+ def test_tmp_filename(
+ self, df_collection_data_type, offset: str, duration: timedelta
+ ):
pass
@pytest.mark.skip
- def test_tmp_path(self, df_collection_data_type, offset, duration):
+ def test_tmp_path(self, df_collection_data_type, offset: str, duration: timedelta):
pass
- def test_is_empty(self, df_collection_data_type, df_collection, offset, duration):
+ def test_is_empty(
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
+ ):
"""
test_has_empty was merged into this because item.has_empty is
an alias for is_empty.. or vis-versa
@@ -542,7 +557,7 @@ class TestDFCollectionItemMethodBase:
assert item.has_empty()
def test_has_partial_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
for item in df_collection.items:
assert not item.has_partial_archive()
@@ -550,7 +565,7 @@ class TestDFCollectionItemMethodBase:
assert item.has_partial_archive()
def test_has_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
for item in df_collection.items:
# (1) Originally, nothing exists... so let's just make a file and
@@ -587,7 +602,7 @@ class TestDFCollectionItemMethodBase:
assert item.has_archive(include_empty=True)
def test_delete_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
for item in df_collection.items:
item: DFCollectionItem
@@ -610,7 +625,7 @@ class TestDFCollectionItemMethodBase:
assert not item.partial_path.exists()
def test_should_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
schema: DataFrameSchema = df_collection._schema
aa = schema.metadata[ARCHIVE_AFTER]
@@ -627,11 +642,13 @@ class TestDFCollectionItemMethodBase:
assert not item.should_archive()
@pytest.mark.skip
- def test_set_empty(self, df_collection_data_type, df_collection, offset, duration):
+ def test_set_empty(
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
+ ):
pass
def test_valid_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
# Originally, nothing has been saved or anything.. so confirm it
# always comes back as None
@@ -655,17 +672,19 @@ class TestDFCollectionItemMethodBase:
@pytest.mark.skip
def test_validate_df(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@pytest.mark.skip
def test_from_archive(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
- def test__to_dict(self, df_collection_data_type, df_collection, offset, duration):
+ def test__to_dict(
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
+ ):
for item in df_collection.items:
res = item._to_dict()
@@ -683,19 +702,19 @@ class TestDFCollectionItemMethodBase:
@pytest.mark.skip
def test_delete_partial(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@pytest.mark.skip
def test_cleanup_partials(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@pytest.mark.skip
def test_delete_dangling_partials(
- self, df_collection_data_type, df_collection, offset, duration
+ self, df_collection_data_type, df_collection, offset: str, duration: timedelta
):
pass
@@ -715,7 +734,7 @@ async def test_client(client, s, worker):
)
@gen_cluster(client=True, nthreads=[("127.0.0.1", 1)])
@pytest.mark.anyio
-async def test_client_parametrize(c, s, w, df_collection_data_type, offset):
+async def test_client_parametrize(c, s, w, df_collection_data_type, offset: str):
"""c,s,a are all required - the secondary Worker (b) is not required"""
assert isinstance(c, Client), f"c is not Client, it's {type(c)}"
@@ -740,16 +759,16 @@ class TestDFCollectionItemFunctionalTest:
def test_to_archive_and_ddf(
self,
df_collection_data_type,
- offset,
- duration,
+ offset: str,
+ duration: timedelta,
client_no_amm,
df_collection,
- user,
- user_factory,
- product,
+ user: User,
+ user_factory: Callable[..., User],
+ product: Product,
incite_item_factory,
delete_df_collection,
- mnt_filepath,
+ mnt_filepath: "GRLDatasets",
):
from generalresearch.models.thl.user import User
@@ -790,16 +809,16 @@ class TestDFCollectionItemFunctionalTest:
def test_filesize_estimate(
self,
df_collection,
- user,
- offset,
- duration,
+ user: User,
+ offset: str,
+ duration: timedelta,
client_no_amm,
- user_factory,
- product,
+ user_factory: Callable[..., User],
+ product: Product,
df_collection_data_type,
incite_item_factory,
delete_df_collection,
- mnt_filepath,
+ mnt_filepath: "GRLDatasets",
):
"""A functional test to write some Parquet files for the
DFCollection and then confirm that the files get written
@@ -809,9 +828,11 @@ class TestDFCollectionItemFunctionalTest:
(1) Validating their passing the pandera schema
(2) The file or dir has an expected size on disk
"""
+ import os
+
import pyarrow.parquet as pq
+
from generalresearch.models.thl.user import User
- import os
if df_collection.data_type in unsupported_mock_types:
return
@@ -838,14 +859,14 @@ class TestDFCollectionItemFunctionalTest:
self,
client_no_amm,
df_collection,
- user_factory,
- product,
- offset,
- duration,
+ user_factory: Callable[..., User],
+ product: Product,
+ offset: str,
+ duration: timedelta,
df_collection_data_type,
incite_item_factory,
delete_df_collection,
- mnt_filepath,
+ mnt_filepath: "GRLDatasets",
):
from generalresearch.models.thl.user import User
@@ -875,7 +896,9 @@ class TestDFCollectionItemFunctionalTest:
assert item.has_archive(include_empty=True)
@pytest.mark.skip
- def test_get_items(self, df_collection, product, offset, duration):
+ def test_get_items(
+ self, df_collection, product: Product, offset: str, duration: timedelta
+ ):
with pytest.warns(expected_warning=ResourceWarning) as cm:
df_collection.get_items_last365()
assert "DFCollectionItem has missing archives" in str(
@@ -892,11 +915,11 @@ class TestDFCollectionItemFunctionalTest:
df_collection,
incite_item_factory,
delete_df_collection,
- user_factory,
- product,
- offset,
- duration,
- mnt_filepath,
+ user_factory: Callable[..., User],
+ product: Product,
+ offset: str,
+ duration: timedelta,
+ mnt_filepath: "GRLDatasets",
):
"""Don't allow creating an archive for data that will likely be
overwritten or updated
@@ -934,10 +957,10 @@ class TestDFCollectionItemFunctionalTest:
df_collection,
incite_item_factory,
delete_df_collection,
- user,
- offset,
- duration,
- mnt_filepath,
+ user: User,
+ offset: str,
+ duration: timedelta,
+ mnt_filepath: "GRLDatasets",
):
delete_df_collection(coll=df_collection)
@@ -962,10 +985,10 @@ class TestDFCollectionItemFunctionalTest:
df_collection,
incite_item_factory,
delete_df_collection,
- user_factory,
- product,
- offset,
- duration,
+ user_factory: Callable[..., User],
+ product: Product,
+ offset: str,
+ duration: timedelta,
mnt_filepath,
):
from generalresearch.models.thl.user import User
diff --git a/tests/incite/collections/test_df_collection_thl_marketplaces.py b/tests/incite/collections/test_df_collection_thl_marketplaces.py
index 0a77938..54c27f7 100644
--- a/tests/incite/collections/test_df_collection_thl_marketplaces.py
+++ b/tests/incite/collections/test_df_collection_thl_marketplaces.py
@@ -1,11 +1,11 @@
from datetime import datetime, timezone
from itertools import product
+from typing import TYPE_CHECKING
import pytest
-from pandera import Column, Index, DataFrameSchema
+from pandera import Column, DataFrameSchema, Index
-from generalresearch.incite.collections import DFCollection
-from generalresearch.incite.collections import DFCollectionType
+from generalresearch.incite.collections import DFCollection, DFCollectionType
from generalresearch.incite.collections.thl_marketplaces import (
InnovateSurveyHistoryCollection,
MorningSurveyTimeseriesCollection,
@@ -14,6 +14,9 @@ from generalresearch.incite.collections.thl_marketplaces import (
)
from test_utils.incite.conftest import mnt_filepath
+if TYPE_CHECKING:
+ from generalresearch.incite.base import GRLDatasets
+
def combo_object():
for x in product(
diff --git a/tests/incite/collections/test_df_collection_thl_web.py b/tests/incite/collections/test_df_collection_thl_web.py
index e6f464b..51ca128 100644
--- a/tests/incite/collections/test_df_collection_thl_web.py
+++ b/tests/incite/collections/test_df_collection_thl_web.py
@@ -1,5 +1,6 @@
from datetime import datetime
from itertools import product
+from typing import TYPE_CHECKING
import dask.dataframe as dd
import pandas as pd
@@ -8,6 +9,9 @@ from pandera import DataFrameSchema
from generalresearch.incite.collections import DFCollection, DFCollectionType
+if TYPE_CHECKING:
+ from generalresearch.incite.base import GRLDatasets
+
def combo_object():
for x in product(
@@ -29,7 +33,7 @@ def combo_object():
)
class TestDFCollection_thl_web:
- def test_init(self, df_collection_data_type, offset, df_collection):
+ def test_init(self, df_collection_data_type, offset: str, df_collection):
assert isinstance(df_collection_data_type, DFCollectionType)
assert isinstance(df_collection, DFCollection)
@@ -39,12 +43,12 @@ class TestDFCollection_thl_web:
)
class TestDFCollection_thl_web_Properties:
- def test_items(self, df_collection_data_type, offset, df_collection):
+ def test_items(self, df_collection_data_type, offset: str, df_collection):
assert isinstance(df_collection.items, list)
for i in df_collection.items:
assert i._collection == df_collection
- def test__schema(self, df_collection_data_type, offset, df_collection):
+ def test__schema(self, df_collection_data_type, offset: str, df_collection):
assert isinstance(df_collection._schema, DataFrameSchema)
@@ -54,16 +58,16 @@ class TestDFCollection_thl_web_Properties:
class TestDFCollection_thl_web_BaseProperties:
@pytest.mark.skip
- def test__interval_range(self, df_collection_data_type, offset, df_collection):
+ def test__interval_range(self, df_collection_data_type, offset: str, df_collection):
pass
- def test_interval_start(self, df_collection_data_type, offset, df_collection):
+ def test_interval_start(self, df_collection_data_type, offset: str, df_collection):
assert isinstance(df_collection.interval_start, datetime)
- def test_interval_range(self, df_collection_data_type, offset, df_collection):
+ def test_interval_range(self, df_collection_data_type, offset: str, df_collection):
assert isinstance(df_collection.interval_range, list)
- def test_progress(self, df_collection_data_type, offset, df_collection):
+ def test_progress(self, df_collection_data_type, offset: str, df_collection):
assert isinstance(df_collection.progress, pd.DataFrame)
@@ -78,7 +82,7 @@ class TestDFCollection_thl_web_Methods:
@pytest.mark.skip
def test_fetch_force_rr_latest(
- self, df_collection_data_type, df_collection, offset
+ self, df_collection_data_type, df_collection, offset: str
):
pass
@@ -92,55 +96,63 @@ class TestDFCollection_thl_web_Methods:
)
class TestDFCollection_thl_web_BaseMethods:
- def test_fetch_all_paths(self, df_collection_data_type, offset, df_collection):
+ def test_fetch_all_paths(self, df_collection_data_type, offset: str, df_collection):
res = df_collection.fetch_all_paths(
items=None, force_rr_latest=False, include_partial=False
)
assert isinstance(res, list)
@pytest.mark.skip
- def test_ddf(self, df_collection_data_type, offset, df_collection):
+ def test_ddf(self, df_collection_data_type, offset: str, df_collection):
res = df_collection.ddf()
assert isinstance(res, dd.DataFrame)
# -- cleanup --
@pytest.mark.skip
- def test_schedule_cleanup(self, df_collection_data_type, offset, df_collection):
+ def test_schedule_cleanup(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
@pytest.mark.skip
- def test_cleanup(self, df_collection_data_type, offset, df_collection):
+ def test_cleanup(self, df_collection_data_type, offset: str, df_collection):
pass
@pytest.mark.skip
- def test_cleanup_partials(self, df_collection_data_type, offset, df_collection):
+ def test_cleanup_partials(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
@pytest.mark.skip
- def test_clear_tmp_archives(self, df_collection_data_type, offset, df_collection):
+ def test_clear_tmp_archives(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
@pytest.mark.skip
def test_clear_corrupt_archives(
- self, df_collection_data_type, offset, df_collection
+ self, df_collection_data_type, offset: str, df_collection
):
pass
@pytest.mark.skip
- def test_rebuild_symlinks(self, df_collection_data_type, offset, df_collection):
+ def test_rebuild_symlinks(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
# -- Source timing --
@pytest.mark.skip
- def test_get_item(self, df_collection_data_type, offset, df_collection):
+ def test_get_item(self, df_collection_data_type, offset: str, df_collection):
pass
@pytest.mark.skip
- def test_get_item_start(self, df_collection_data_type, offset, df_collection):
+ def test_get_item_start(self, df_collection_data_type, offset: str, df_collection):
pass
@pytest.mark.skip
- def test_get_items(self, df_collection_data_type, offset, df_collection):
+ def test_get_items(self, df_collection_data_type, offset: str, df_collection):
# If we get all the items from the start of the collection, it
# should include all the items!
res1 = df_collection.items
@@ -148,13 +160,19 @@ class TestDFCollection_thl_web_BaseMethods:
assert len(res1) == len(res2)
@pytest.mark.skip
- def test_get_items_from_year(self, df_collection_data_type, offset, df_collection):
+ def test_get_items_from_year(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
@pytest.mark.skip
- def test_get_items_last90(self, df_collection_data_type, offset, df_collection):
+ def test_get_items_last90(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
@pytest.mark.skip
- def test_get_items_last365(self, df_collection_data_type, offset, df_collection):
+ def test_get_items_last365(
+ self, df_collection_data_type, offset: str, df_collection
+ ):
pass
diff --git a/tests/incite/mergers/foundations/test_enriched_session.py b/tests/incite/mergers/foundations/test_enriched_session.py
index ec15d38..47f243e 100644
--- a/tests/incite/mergers/foundations/test_enriched_session.py
+++ b/tests/incite/mergers/foundations/test_enriched_session.py
@@ -1,16 +1,16 @@
-from datetime import timedelta, timezone, datetime
+from datetime import datetime, timedelta, timezone
from decimal import Decimal
from itertools import product
from typing import Optional
-from generalresearch.incite.schemas.admin_responses import (
- AdminPOPSessionSchema,
-)
-
import dask.dataframe as dd
import pandas as pd
import pytest
+from generalresearch.incite.schemas.admin_responses import (
+ AdminPOPSessionSchema,
+)
+from generalresearch.pg_helper import PostgresConfig
from test_utils.incite.collections.conftest import (
session_collection,
wall_collection,
@@ -36,7 +36,7 @@ class TestEnrichedSession:
wall_collection,
session_collection,
enriched_session_merge,
- thl_web_rr,
+ thl_web_rr: PostgresConfig,
delete_df_collection,
incite_item_factory,
):
@@ -95,7 +95,7 @@ class TestEnrichedSessionAdmin:
client_no_amm,
wall_collection,
session_collection,
- thl_web_rr,
+ thl_web_rr: PostgresConfig,
session_report_request,
user_factory,
start,