diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/grliq/managers/test_forensic_data.py | 50 | ||||
| -rw-r--r-- | tests/grliq/managers/test_forensic_results.py | 15 | ||||
| -rw-r--r-- | tests/grliq/models/test_forensic_data.py | 34 | ||||
| -rw-r--r-- | tests/grliq/test_utils.py | 5 | ||||
| -rw-r--r-- | tests/incite/collections/test_df_collection_base.py | 16 | ||||
| -rw-r--r-- | tests/incite/collections/test_df_collection_item_base.py | 18 | ||||
| -rw-r--r-- | tests/incite/collections/test_df_collection_item_thl_web.py | 241 | ||||
| -rw-r--r-- | tests/incite/collections/test_df_collection_thl_marketplaces.py | 9 | ||||
| -rw-r--r-- | tests/incite/collections/test_df_collection_thl_web.py | 62 | ||||
| -rw-r--r-- | tests/incite/mergers/foundations/test_enriched_session.py | 14 |
10 files changed, 273 insertions, 191 deletions
diff --git a/tests/grliq/managers/test_forensic_data.py b/tests/grliq/managers/test_forensic_data.py index ed4da80..ac2792a 100644 --- a/tests/grliq/managers/test_forensic_data.py +++ b/tests/grliq/managers/test_forensic_data.py @@ -1,14 +1,21 @@ from datetime import timedelta +from typing import TYPE_CHECKING from uuid import uuid4 import pytest -from generalresearch.grliq.models.events import TimingData, MouseEvent -from generalresearch.grliq.models.forensic_data import GrlIqData -from generalresearch.grliq.models.forensic_result import ( - GrlIqCheckerResults, - GrlIqForensicCategoryResult, -) +if TYPE_CHECKING: + from generalresearch.grliq.managers.forensic_data import ( + GrlIqDataManager, + GrlIqEventManager, + ) + from generalresearch.grliq.models.events import MouseEvent, TimingData + from generalresearch.grliq.models.forensic_data import GrlIqData + from generalresearch.grliq.models.forensic_result import ( + GrlIqCheckerResults, + GrlIqForensicCategoryResult, + ) + from generalresearch.models.thl.product import Product try: from psycopg.errors import UniqueViolation @@ -18,18 +25,16 @@ except ImportError: class TestGrlIqDataManager: - def test_create_dummy(self, grliq_dm): - from generalresearch.grliq.managers.forensic_data import GrlIqDataManager + def test_create_dummy(self, grliq_dm: "GrlIqDataManager"): from generalresearch.grliq.models.forensic_data import GrlIqData - grliq_dm: GrlIqDataManager gd1: GrlIqData = grliq_dm.create_dummy(is_attempt_allowed=True) assert isinstance(gd1, GrlIqData) assert isinstance(gd1.results, GrlIqCheckerResults) assert isinstance(gd1.category_result, GrlIqForensicCategoryResult) - def test_create(self, grliq_data, grliq_dm): + def test_create(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"): grliq_dm.create(grliq_data) assert grliq_data.id is not None @@ -45,20 +50,16 @@ class TestGrlIqDataManager: pass @pytest.mark.skip(reason="todo") - def test_update_fingerprint(self): - pass - - @pytest.mark.skip(reason="todo") def test_update_data(self): pass - def test_get_id(self, grliq_data, grliq_dm): + def test_get_id(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"): grliq_dm.create(grliq_data) res = grliq_dm.get_data(forensic_id=grliq_data.id) assert res == grliq_data - def test_get_uuid(self, grliq_data, grliq_dm): + def test_get_uuid(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"): grliq_dm.create(grliq_data) res = grliq_dm.get_data(forensic_uuid=grliq_data.uuid) @@ -72,7 +73,7 @@ class TestGrlIqDataManager: def test_get_unique_user_count_by_fingerprint(self): pass - def test_filter_data(self, grliq_data, grliq_dm): + def test_filter_data(self, grliq_data: "GrlIqData", grliq_dm: "GrlIqDataManager"): grliq_dm.create(grliq_data) res = grliq_dm.filter_data(uuids=[grliq_data.uuid])[0] assert res == grliq_data @@ -99,7 +100,7 @@ class TestGrlIqDataManager: def test_make_filter_str(self): pass - def test_filter_count(self, grliq_dm, product): + def test_filter_count(self, grliq_dm: "GrlIqDataManager", product: "Product"): res = grliq_dm.filter_count(product_id=product.uuid) assert isinstance(res, int) @@ -115,7 +116,7 @@ class TestGrlIqDataManager: class TestForensicDataGetAndFilter: - def test_events(self, grliq_dm, grliq_em): + def test_events(self, grliq_dm: "GrlIqDataManager"): """If load_events=True, the events and mouse_events attributes should be an array no matter what. An empty array means that the events were loaded, but there were no events available. @@ -140,7 +141,7 @@ class TestForensicDataGetAndFilter: assert len(instance.events) == 0 assert len(instance.mouse_events) == 0 - def test_timing(self, grliq_dm, grliq_em): + def test_timing(self, grliq_dm: "GrlIqDataManager", grliq_em: "GrlIqEventManager"): forensic_uuid = uuid4().hex grliq_dm.create_dummy(is_attempt_allowed=True, uuid=forensic_uuid) @@ -152,13 +153,16 @@ class TestForensicDataGetAndFilter: client_rtts=[100, 200, 150], server_rtts=[150, 120, 120] ), ) + instance = grliq_dm.get_data(forensic_uuid=forensic_uuid, load_events=True) assert isinstance(instance, GrlIqData) assert isinstance(instance.events, list) assert isinstance(instance.mouse_events, list) assert isinstance(instance.timing_data, TimingData) - def test_events_events(self, grliq_dm, grliq_em): + def test_events_events( + self, grliq_dm: "GrlIqDataManager", grliq_em: "GrlIqEventManager" + ): forensic_uuid = uuid4().hex grliq_dm.create_dummy(is_attempt_allowed=True, uuid=forensic_uuid) @@ -181,7 +185,9 @@ class TestForensicDataGetAndFilter: assert len(instance.pointer_move_events) == 0 assert len(instance.keyboard_events) == 0 - def test_events_click(self, grliq_dm, grliq_em): + def test_events_click( + self, grliq_dm: "GrlIqDataManager", grliq_em: "GrlIqEventManager" + ): forensic_uuid = uuid4().hex grliq_dm.create_dummy(is_attempt_allowed=True, uuid=forensic_uuid) instance = grliq_dm.get_data(forensic_uuid=forensic_uuid, load_events=True) diff --git a/tests/grliq/managers/test_forensic_results.py b/tests/grliq/managers/test_forensic_results.py index a837a64..68db732 100644 --- a/tests/grliq/managers/test_forensic_results.py +++ b/tests/grliq/managers/test_forensic_results.py @@ -1,9 +1,20 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from generalresearch.grliq.managers.forensic_data import GrlIqDataManager + from generalresearch.grliq.managers.forensic_results import ( + GrlIqCategoryResultsReader, + ) + + class TestGrlIqCategoryResultsReader: - def test_filter_category_results(self, grliq_dm, grliq_crr): + def test_filter_category_results( + self, grliq_dm: "GrlIqDataManager", grliq_crr: "GrlIqCategoryResultsReader" + ): from generalresearch.grliq.models.forensic_result import ( - Phase, GrlIqForensicCategoryResult, + Phase, ) # this is just testing that it doesn't fail diff --git a/tests/grliq/models/test_forensic_data.py b/tests/grliq/models/test_forensic_data.py index 653f9a9..4fbf962 100644 --- a/tests/grliq/models/test_forensic_data.py +++ b/tests/grliq/models/test_forensic_data.py @@ -1,42 +1,50 @@ +from typing import TYPE_CHECKING + import pytest from pydantic import ValidationError -from generalresearch.grliq.models.forensic_data import GrlIqData, Platform +if TYPE_CHECKING: + from generalresearch.grliq.models.forensic_data import GrlIqData class TestGrlIqData: - def test_supported_fonts(self, grliq_data): + def test_supported_fonts(self, grliq_data: "GrlIqData"): s = grliq_data.supported_fonts_binary assert len(s) == 1043 assert "Ubuntu" in grliq_data.supported_fonts - def test_battery(self, grliq_data): + def test_battery(self, grliq_data: "GrlIqData"): assert not grliq_data.battery_charging assert grliq_data.battery_level == 0.41 - def test_base(self, grliq_data): - g: GrlIqData = grliq_data - assert g.timezone == "America/Los_Angeles" - assert g.platform == Platform.LINUX_X86_64 - assert g.webgl_extensions + def test_base(self, grliq_data: "GrlIqData"): + from generalresearch.grliq.models.forensic_data import Platform + + assert grliq_data.timezone == "America/Los_Angeles" + assert grliq_data.platform == Platform.LINUX_X86_64 + assert grliq_data.webgl_extensions # ... more - assert g.results is None - assert g.category_result is None + assert grliq_data.results is None + assert grliq_data.category_result is None + + s = grliq_data.model_dump_json() + from generalresearch.grliq.models.forensic_data import GrlIqData, Platform - s = g.model_dump_json() g2: GrlIqData = GrlIqData.model_validate_json(s) assert g2.results is None assert g2.category_result is None - assert g == g2 + assert grliq_data == g2 # Testing things that will cause a validation error, should only be # because something is "corrupt", not b/c the user is a baddie - def test_corrupt(self, grliq_data): + def test_corrupt(self, grliq_data: "GrlIqData"): """Test for timestamp and timezone offset mismatch validation.""" + from generalresearch.grliq.models.forensic_data import GrlIqData + d = grliq_data.model_dump(mode="json") d.update( { diff --git a/tests/grliq/test_utils.py b/tests/grliq/test_utils.py index d9034d5..7f794e8 100644 --- a/tests/grliq/test_utils.py +++ b/tests/grliq/test_utils.py @@ -1,10 +1,13 @@ +from datetime import datetime from pathlib import Path from uuid import uuid4 class TestUtils: - def test_get_screenshot_fp(self, mnt_grliq_archive_dir, utc_hour_ago): + def test_get_screenshot_fp( + self, mnt_grliq_archive_dir: str, utc_hour_ago: datetime + ): from generalresearch.grliq.utils import get_screenshot_fp fp1 = get_screenshot_fp( diff --git a/tests/incite/collections/test_df_collection_base.py b/tests/incite/collections/test_df_collection_base.py index 5aaa729..10f025b 100644 --- a/tests/incite/collections/test_df_collection_base.py +++ b/tests/incite/collections/test_df_collection_base.py @@ -1,15 +1,19 @@ from datetime import datetime, timezone +from typing import TYPE_CHECKING import pandas as pd import pytest from pandera import DataFrameSchema from generalresearch.incite.collections import ( - DFCollectionType, DFCollection, + DFCollectionType, ) from test_utils.incite.conftest import mnt_filepath +if TYPE_CHECKING: + from generalresearch.incite.base import GRLDatasets + df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType.TEST] @@ -20,7 +24,7 @@ class TestDFCollectionBase: """ - def test_init(self, mnt_filepath, df_coll_type): + def test_init(self, mnt_filepath: "GRLDatasets", df_coll_type: DFCollectionType): """Try to initialize the DFCollection with various invalid parameters""" with pytest.raises(expected_exception=ValueError) as cm: DFCollection(archive_path=mnt_filepath.data_src) @@ -42,7 +46,7 @@ class TestDFCollectionBase: class TestDFCollectionBaseProperties: @pytest.mark.skip - def test_df_collection_items(self, mnt_filepath, df_coll_type): + def test_df_collection_items(self, mnt_filepath: "GRLDatasets", df_coll_type): instance = DFCollection( data_type=df_coll_type, start=datetime(year=1800, month=1, day=1, tzinfo=timezone.utc), @@ -54,7 +58,7 @@ class TestDFCollectionBaseProperties: assert len(instance.interval_range) == len(instance.items) assert len(instance.items) == 366 - def test_df_collection_progress(self, mnt_filepath, df_coll_type): + def test_df_collection_progress(self, mnt_filepath: "GRLDatasets", df_coll_type): instance = DFCollection( data_type=df_coll_type, start=datetime(year=1800, month=1, day=1, tzinfo=timezone.utc), @@ -67,7 +71,7 @@ class TestDFCollectionBaseProperties: assert isinstance(instance.progress, pd.DataFrame) assert instance.progress.shape == (366, 6) - def test_df_collection_schema(self, mnt_filepath, df_coll_type): + def test_df_collection_schema(self, mnt_filepath: "GRLDatasets", df_coll_type): instance1 = DFCollection( data_type=DFCollectionType.WALL, archive_path=mnt_filepath.data_src ) @@ -84,7 +88,7 @@ class TestDFCollectionBaseProperties: class TestDFCollectionBaseMethods: @pytest.mark.skip - def test_initial_load(self, mnt_filepath, thl_web_rr): + def test_initial_load(self, mnt_filepath: "GRLDatasets", thl_web_rr): instance = DFCollection( pg_config=thl_web_rr, data_type=DFCollectionType.USER, diff --git a/tests/incite/collections/test_df_collection_item_base.py b/tests/incite/collections/test_df_collection_item_base.py index a0c0b0b..136d234 100644 --- a/tests/incite/collections/test_df_collection_item_base.py +++ b/tests/incite/collections/test_df_collection_item_base.py @@ -1,13 +1,17 @@ from datetime import datetime, timezone +from typing import TYPE_CHECKING import pytest from generalresearch.incite.collections import ( - DFCollectionType, - DFCollectionItem, DFCollection, + DFCollectionItem, + DFCollectionType, ) -from test_utils.incite.conftest import mnt_filepath +from generalresearch.pg_helper import PostgresConfig + +if TYPE_CHECKING: + from generalresearch.incite.base import GRLDatasets df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType.TEST] @@ -15,7 +19,7 @@ df_collection_types = [e for e in DFCollectionType if e is not DFCollectionType. @pytest.mark.parametrize("df_coll_type", df_collection_types) class TestDFCollectionItemBase: - def test_init(self, mnt_filepath, df_coll_type): + def test_init(self, mnt_filepath: "GRLDatasets", df_coll_type): collection = DFCollection( data_type=df_coll_type, offset="100d", @@ -41,7 +45,7 @@ class TestDFCollectionItemProperties: @pytest.mark.parametrize("df_coll_type", df_collection_types) class TestDFCollectionItemMethods: - def test_has_mysql_false(self, mnt_filepath, df_coll_type): + def test_has_mysql_false(self, mnt_filepath: "GRLDatasets", df_coll_type): collection = DFCollection( data_type=df_coll_type, offset="100d", @@ -53,7 +57,9 @@ class TestDFCollectionItemMethods: instance1: DFCollectionItem = collection.items[0] assert not instance1.has_mysql() - def test_has_mysql_true(self, thl_web_rr, mnt_filepath, df_coll_type): + def test_has_mysql_true( + self, thl_web_rr: PostgresConfig, mnt_filepath: "GRLDatasets", df_coll_type + ): collection = DFCollection( data_type=df_coll_type, offset="100d", diff --git a/tests/incite/collections/test_df_collection_item_thl_web.py b/tests/incite/collections/test_df_collection_item_thl_web.py index 9c3d67a..29f3677 100644 --- a/tests/incite/collections/test_df_collection_item_thl_web.py +++ b/tests/incite/collections/test_df_collection_item_thl_web.py @@ -1,7 +1,8 @@ -from datetime import datetime, timezone, timedelta +from datetime import datetime, timedelta, timezone from itertools import product as iter_product from os.path import join as pjoin -from pathlib import PurePath, Path +from pathlib import Path, PurePath +from typing import TYPE_CHECKING, Callable from uuid import uuid4 import dask.dataframe as dd @@ -11,13 +12,13 @@ from distributed import Client, Scheduler, Worker # noinspection PyUnresolvedReferences from distributed.utils_test import ( - gen_cluster, + cleanup, + client, client_no_amm, + cluster_fixture, + gen_cluster, loop, loop_in_thread, - cleanup, - cluster_fixture, - client, ) from faker import Faker from pandera import DataFrameSchema @@ -29,10 +30,14 @@ from generalresearch.incite.collections import ( DFCollectionType, ) from generalresearch.incite.schemas import ARCHIVE_AFTER +from generalresearch.models.thl.product import Product from generalresearch.models.thl.user import User from generalresearch.pg_helper import PostgresConfig from generalresearch.sql_helper import PostgresDsn -from test_utils.incite.conftest import mnt_filepath, incite_item_factory +from test_utils.incite.conftest import incite_item_factory, mnt_filepath + +if TYPE_CHECKING: + from generalresearch.incite.base import GRLDatasets fake = Faker() @@ -71,7 +76,7 @@ class TestDFCollectionItemBase: ) class TestDFCollectionItemProperties: - def test_filename(self, df_collection_data_type, df_collection, offset): + def test_filename(self, df_collection_data_type, df_collection, offset: str): for i in df_collection.items: assert isinstance(i.filename, str) @@ -88,35 +93,37 @@ class TestDFCollectionItemProperties: ) class TestDFCollectionItemPropertiesBase: - def test_name(self, df_collection_data_type, offset, df_collection): + def test_name(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.name, str) - def test_finish(self, df_collection_data_type, offset, df_collection): + def test_finish(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.finish, datetime) - def test_interval(self, df_collection_data_type, offset, df_collection): + def test_interval(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.interval, pd.Interval) - def test_partial_filename(self, df_collection_data_type, offset, df_collection): + def test_partial_filename( + self, df_collection_data_type, offset: str, df_collection + ): for i in df_collection.items: assert isinstance(i.partial_filename, str) - def test_empty_filename(self, df_collection_data_type, offset, df_collection): + def test_empty_filename(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.empty_filename, str) - def test_path(self, df_collection_data_type, offset, df_collection): + def test_path(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.path, FilePath) - def test_partial_path(self, df_collection_data_type, offset, df_collection): + def test_partial_path(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.partial_path, FilePath) - def test_empty_path(self, df_collection_data_type, offset, df_collection): + def test_empty_path(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.empty_path, FilePath) @@ -136,9 +143,9 @@ class TestDFCollectionItemMethod: def test_has_mysql( self, df_collection, - thl_web_rr, - offset, - duration, + thl_web_rr: PostgresConfig, + offset: str, + duration: timedelta, df_collection_data_type, delete_df_collection, ): @@ -166,9 +173,9 @@ class TestDFCollectionItemMethod: def test_update_partial_archive( self, df_collection, - offset, - duration, - thl_web_rw, + offset: str, + duration: timedelta, + thl_web_rw: PostgresConfig, df_collection_data_type, delete_df_collection, ): @@ -181,26 +188,26 @@ class TestDFCollectionItemMethod: def test_create_partial_archive( self, df_collection, - offset, - duration, + offset: str, + duration: str, create_main_accounts, - thl_web_rw, + thl_web_rw: PostgresConfig, thl_lm, df_collection_data_type, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, client_no_amm, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): assert 1 + 1 == 2 def test_dict( self, df_collection_data_type, - offset, - duration, + offset: str, + duration: timedelta, df_collection, delete_df_collection, ): @@ -224,12 +231,12 @@ class TestDFCollectionItemMethod: self, df_collection_data_type, df_collection, - offset, - duration, + offset: str, + duration: timedelta, create_main_accounts, - thl_web_rw, - user_factory, - product, + thl_web_rw: PostgresConfig, + user_factory: Callable[..., User], + product: Product, incite_item_factory, delete_df_collection, ): @@ -270,10 +277,10 @@ class TestDFCollectionItemMethod: self, df_collection_data_type, df_collection, - offset, - duration, - user_factory, - product, + offset: str, + duration: timedelta, + user_factory: Callable[..., User], + product: Product, incite_item_factory, delete_df_collection, ): @@ -316,15 +323,15 @@ class TestDFCollectionItemMethod: def test_from_mysql_ledger( self, df_collection, - user, + user: User, create_main_accounts, - offset, - duration, - thl_web_rw, + offset: str, + duration: timedelta, + thl_web_rw: PostgresConfig, thl_lm, df_collection_data_type, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, client_no_amm, incite_item_factory, delete_df_collection, @@ -371,12 +378,12 @@ class TestDFCollectionItemMethod: def test_to_archive( self, df_collection, - user, - offset, - duration, + user: User, + offset: str, + duration: timedelta, df_collection_data_type, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, client_no_amm, incite_item_factory, delete_df_collection, @@ -410,12 +417,12 @@ class TestDFCollectionItemMethod: self, df_collection_data_type, df_collection, - user_factory, - product, - offset, - duration, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, client_no_amm, - user, + user: User, incite_item_factory, delete_df_collection, mnt_filepath, @@ -481,19 +488,19 @@ class TestDFCollectionItemMethod: @pytest.mark.skip def test_to_archive_numbered_partial( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_initial_load( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_clear_corrupt_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @@ -505,28 +512,36 @@ class TestDFCollectionItemMethod: class TestDFCollectionItemMethodBase: @pytest.mark.skip - def test_path_exists(self, df_collection_data_type, offset, duration): + def test_path_exists( + self, df_collection_data_type, offset: str, duration: timedelta + ): pass @pytest.mark.skip - def test_next_numbered_path(self, df_collection_data_type, offset, duration): + def test_next_numbered_path( + self, df_collection_data_type, offset: str, duration: timedelta + ): pass @pytest.mark.skip def test_search_highest_numbered_path( - self, df_collection_data_type, offset, duration + self, df_collection_data_type, offset: str, duration: timedelta ): pass @pytest.mark.skip - def test_tmp_filename(self, df_collection_data_type, offset, duration): + def test_tmp_filename( + self, df_collection_data_type, offset: str, duration: timedelta + ): pass @pytest.mark.skip - def test_tmp_path(self, df_collection_data_type, offset, duration): + def test_tmp_path(self, df_collection_data_type, offset: str, duration: timedelta): pass - def test_is_empty(self, df_collection_data_type, df_collection, offset, duration): + def test_is_empty( + self, df_collection_data_type, df_collection, offset: str, duration: timedelta + ): """ test_has_empty was merged into this because item.has_empty is an alias for is_empty.. or vis-versa @@ -542,7 +557,7 @@ class TestDFCollectionItemMethodBase: assert item.has_empty() def test_has_partial_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): for item in df_collection.items: assert not item.has_partial_archive() @@ -550,7 +565,7 @@ class TestDFCollectionItemMethodBase: assert item.has_partial_archive() def test_has_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): for item in df_collection.items: # (1) Originally, nothing exists... so let's just make a file and @@ -587,7 +602,7 @@ class TestDFCollectionItemMethodBase: assert item.has_archive(include_empty=True) def test_delete_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): for item in df_collection.items: item: DFCollectionItem @@ -610,7 +625,7 @@ class TestDFCollectionItemMethodBase: assert not item.partial_path.exists() def test_should_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): schema: DataFrameSchema = df_collection._schema aa = schema.metadata[ARCHIVE_AFTER] @@ -627,11 +642,13 @@ class TestDFCollectionItemMethodBase: assert not item.should_archive() @pytest.mark.skip - def test_set_empty(self, df_collection_data_type, df_collection, offset, duration): + def test_set_empty( + self, df_collection_data_type, df_collection, offset: str, duration: timedelta + ): pass def test_valid_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): # Originally, nothing has been saved or anything.. so confirm it # always comes back as None @@ -655,17 +672,19 @@ class TestDFCollectionItemMethodBase: @pytest.mark.skip def test_validate_df( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_from_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass - def test__to_dict(self, df_collection_data_type, df_collection, offset, duration): + def test__to_dict( + self, df_collection_data_type, df_collection, offset: str, duration: timedelta + ): for item in df_collection.items: res = item._to_dict() @@ -683,19 +702,19 @@ class TestDFCollectionItemMethodBase: @pytest.mark.skip def test_delete_partial( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_cleanup_partials( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_delete_dangling_partials( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @@ -715,7 +734,7 @@ async def test_client(client, s, worker): ) @gen_cluster(client=True, nthreads=[("127.0.0.1", 1)]) @pytest.mark.anyio -async def test_client_parametrize(c, s, w, df_collection_data_type, offset): +async def test_client_parametrize(c, s, w, df_collection_data_type, offset: str): """c,s,a are all required - the secondary Worker (b) is not required""" assert isinstance(c, Client), f"c is not Client, it's {type(c)}" @@ -740,16 +759,16 @@ class TestDFCollectionItemFunctionalTest: def test_to_archive_and_ddf( self, df_collection_data_type, - offset, - duration, + offset: str, + duration: timedelta, client_no_amm, df_collection, - user, - user_factory, - product, + user: User, + user_factory: Callable[..., User], + product: Product, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): from generalresearch.models.thl.user import User @@ -790,16 +809,16 @@ class TestDFCollectionItemFunctionalTest: def test_filesize_estimate( self, df_collection, - user, - offset, - duration, + user: User, + offset: str, + duration: timedelta, client_no_amm, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, df_collection_data_type, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): """A functional test to write some Parquet files for the DFCollection and then confirm that the files get written @@ -809,9 +828,11 @@ class TestDFCollectionItemFunctionalTest: (1) Validating their passing the pandera schema (2) The file or dir has an expected size on disk """ + import os + import pyarrow.parquet as pq + from generalresearch.models.thl.user import User - import os if df_collection.data_type in unsupported_mock_types: return @@ -838,14 +859,14 @@ class TestDFCollectionItemFunctionalTest: self, client_no_amm, df_collection, - user_factory, - product, - offset, - duration, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, df_collection_data_type, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): from generalresearch.models.thl.user import User @@ -875,7 +896,9 @@ class TestDFCollectionItemFunctionalTest: assert item.has_archive(include_empty=True) @pytest.mark.skip - def test_get_items(self, df_collection, product, offset, duration): + def test_get_items( + self, df_collection, product: Product, offset: str, duration: timedelta + ): with pytest.warns(expected_warning=ResourceWarning) as cm: df_collection.get_items_last365() assert "DFCollectionItem has missing archives" in str( @@ -892,11 +915,11 @@ class TestDFCollectionItemFunctionalTest: df_collection, incite_item_factory, delete_df_collection, - user_factory, - product, - offset, - duration, - mnt_filepath, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, + mnt_filepath: "GRLDatasets", ): """Don't allow creating an archive for data that will likely be overwritten or updated @@ -934,10 +957,10 @@ class TestDFCollectionItemFunctionalTest: df_collection, incite_item_factory, delete_df_collection, - user, - offset, - duration, - mnt_filepath, + user: User, + offset: str, + duration: timedelta, + mnt_filepath: "GRLDatasets", ): delete_df_collection(coll=df_collection) @@ -962,10 +985,10 @@ class TestDFCollectionItemFunctionalTest: df_collection, incite_item_factory, delete_df_collection, - user_factory, - product, - offset, - duration, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, mnt_filepath, ): from generalresearch.models.thl.user import User diff --git a/tests/incite/collections/test_df_collection_thl_marketplaces.py b/tests/incite/collections/test_df_collection_thl_marketplaces.py index 0a77938..54c27f7 100644 --- a/tests/incite/collections/test_df_collection_thl_marketplaces.py +++ b/tests/incite/collections/test_df_collection_thl_marketplaces.py @@ -1,11 +1,11 @@ from datetime import datetime, timezone from itertools import product +from typing import TYPE_CHECKING import pytest -from pandera import Column, Index, DataFrameSchema +from pandera import Column, DataFrameSchema, Index -from generalresearch.incite.collections import DFCollection -from generalresearch.incite.collections import DFCollectionType +from generalresearch.incite.collections import DFCollection, DFCollectionType from generalresearch.incite.collections.thl_marketplaces import ( InnovateSurveyHistoryCollection, MorningSurveyTimeseriesCollection, @@ -14,6 +14,9 @@ from generalresearch.incite.collections.thl_marketplaces import ( ) from test_utils.incite.conftest import mnt_filepath +if TYPE_CHECKING: + from generalresearch.incite.base import GRLDatasets + def combo_object(): for x in product( diff --git a/tests/incite/collections/test_df_collection_thl_web.py b/tests/incite/collections/test_df_collection_thl_web.py index e6f464b..51ca128 100644 --- a/tests/incite/collections/test_df_collection_thl_web.py +++ b/tests/incite/collections/test_df_collection_thl_web.py @@ -1,5 +1,6 @@ from datetime import datetime from itertools import product +from typing import TYPE_CHECKING import dask.dataframe as dd import pandas as pd @@ -8,6 +9,9 @@ from pandera import DataFrameSchema from generalresearch.incite.collections import DFCollection, DFCollectionType +if TYPE_CHECKING: + from generalresearch.incite.base import GRLDatasets + def combo_object(): for x in product( @@ -29,7 +33,7 @@ def combo_object(): ) class TestDFCollection_thl_web: - def test_init(self, df_collection_data_type, offset, df_collection): + def test_init(self, df_collection_data_type, offset: str, df_collection): assert isinstance(df_collection_data_type, DFCollectionType) assert isinstance(df_collection, DFCollection) @@ -39,12 +43,12 @@ class TestDFCollection_thl_web: ) class TestDFCollection_thl_web_Properties: - def test_items(self, df_collection_data_type, offset, df_collection): + def test_items(self, df_collection_data_type, offset: str, df_collection): assert isinstance(df_collection.items, list) for i in df_collection.items: assert i._collection == df_collection - def test__schema(self, df_collection_data_type, offset, df_collection): + def test__schema(self, df_collection_data_type, offset: str, df_collection): assert isinstance(df_collection._schema, DataFrameSchema) @@ -54,16 +58,16 @@ class TestDFCollection_thl_web_Properties: class TestDFCollection_thl_web_BaseProperties: @pytest.mark.skip - def test__interval_range(self, df_collection_data_type, offset, df_collection): + def test__interval_range(self, df_collection_data_type, offset: str, df_collection): pass - def test_interval_start(self, df_collection_data_type, offset, df_collection): + def test_interval_start(self, df_collection_data_type, offset: str, df_collection): assert isinstance(df_collection.interval_start, datetime) - def test_interval_range(self, df_collection_data_type, offset, df_collection): + def test_interval_range(self, df_collection_data_type, offset: str, df_collection): assert isinstance(df_collection.interval_range, list) - def test_progress(self, df_collection_data_type, offset, df_collection): + def test_progress(self, df_collection_data_type, offset: str, df_collection): assert isinstance(df_collection.progress, pd.DataFrame) @@ -78,7 +82,7 @@ class TestDFCollection_thl_web_Methods: @pytest.mark.skip def test_fetch_force_rr_latest( - self, df_collection_data_type, df_collection, offset + self, df_collection_data_type, df_collection, offset: str ): pass @@ -92,55 +96,63 @@ class TestDFCollection_thl_web_Methods: ) class TestDFCollection_thl_web_BaseMethods: - def test_fetch_all_paths(self, df_collection_data_type, offset, df_collection): + def test_fetch_all_paths(self, df_collection_data_type, offset: str, df_collection): res = df_collection.fetch_all_paths( items=None, force_rr_latest=False, include_partial=False ) assert isinstance(res, list) @pytest.mark.skip - def test_ddf(self, df_collection_data_type, offset, df_collection): + def test_ddf(self, df_collection_data_type, offset: str, df_collection): res = df_collection.ddf() assert isinstance(res, dd.DataFrame) # -- cleanup -- @pytest.mark.skip - def test_schedule_cleanup(self, df_collection_data_type, offset, df_collection): + def test_schedule_cleanup( + self, df_collection_data_type, offset: str, df_collection + ): pass @pytest.mark.skip - def test_cleanup(self, df_collection_data_type, offset, df_collection): + def test_cleanup(self, df_collection_data_type, offset: str, df_collection): pass @pytest.mark.skip - def test_cleanup_partials(self, df_collection_data_type, offset, df_collection): + def test_cleanup_partials( + self, df_collection_data_type, offset: str, df_collection + ): pass @pytest.mark.skip - def test_clear_tmp_archives(self, df_collection_data_type, offset, df_collection): + def test_clear_tmp_archives( + self, df_collection_data_type, offset: str, df_collection + ): pass @pytest.mark.skip def test_clear_corrupt_archives( - self, df_collection_data_type, offset, df_collection + self, df_collection_data_type, offset: str, df_collection ): pass @pytest.mark.skip - def test_rebuild_symlinks(self, df_collection_data_type, offset, df_collection): + def test_rebuild_symlinks( + self, df_collection_data_type, offset: str, df_collection + ): pass # -- Source timing -- @pytest.mark.skip - def test_get_item(self, df_collection_data_type, offset, df_collection): + def test_get_item(self, df_collection_data_type, offset: str, df_collection): pass @pytest.mark.skip - def test_get_item_start(self, df_collection_data_type, offset, df_collection): + def test_get_item_start(self, df_collection_data_type, offset: str, df_collection): pass @pytest.mark.skip - def test_get_items(self, df_collection_data_type, offset, df_collection): + def test_get_items(self, df_collection_data_type, offset: str, df_collection): # If we get all the items from the start of the collection, it # should include all the items! res1 = df_collection.items @@ -148,13 +160,19 @@ class TestDFCollection_thl_web_BaseMethods: assert len(res1) == len(res2) @pytest.mark.skip - def test_get_items_from_year(self, df_collection_data_type, offset, df_collection): + def test_get_items_from_year( + self, df_collection_data_type, offset: str, df_collection + ): pass @pytest.mark.skip - def test_get_items_last90(self, df_collection_data_type, offset, df_collection): + def test_get_items_last90( + self, df_collection_data_type, offset: str, df_collection + ): pass @pytest.mark.skip - def test_get_items_last365(self, df_collection_data_type, offset, df_collection): + def test_get_items_last365( + self, df_collection_data_type, offset: str, df_collection + ): pass diff --git a/tests/incite/mergers/foundations/test_enriched_session.py b/tests/incite/mergers/foundations/test_enriched_session.py index ec15d38..47f243e 100644 --- a/tests/incite/mergers/foundations/test_enriched_session.py +++ b/tests/incite/mergers/foundations/test_enriched_session.py @@ -1,16 +1,16 @@ -from datetime import timedelta, timezone, datetime +from datetime import datetime, timedelta, timezone from decimal import Decimal from itertools import product from typing import Optional -from generalresearch.incite.schemas.admin_responses import ( - AdminPOPSessionSchema, -) - import dask.dataframe as dd import pandas as pd import pytest +from generalresearch.incite.schemas.admin_responses import ( + AdminPOPSessionSchema, +) +from generalresearch.pg_helper import PostgresConfig from test_utils.incite.collections.conftest import ( session_collection, wall_collection, @@ -36,7 +36,7 @@ class TestEnrichedSession: wall_collection, session_collection, enriched_session_merge, - thl_web_rr, + thl_web_rr: PostgresConfig, delete_df_collection, incite_item_factory, ): @@ -95,7 +95,7 @@ class TestEnrichedSessionAdmin: client_no_amm, wall_collection, session_collection, - thl_web_rr, + thl_web_rr: PostgresConfig, session_report_request, user_factory, start, |
