diff options
Diffstat (limited to 'tests/incite/collections/test_df_collection_item_thl_web.py')
| -rw-r--r-- | tests/incite/collections/test_df_collection_item_thl_web.py | 241 |
1 files changed, 132 insertions, 109 deletions
diff --git a/tests/incite/collections/test_df_collection_item_thl_web.py b/tests/incite/collections/test_df_collection_item_thl_web.py index 9c3d67a..29f3677 100644 --- a/tests/incite/collections/test_df_collection_item_thl_web.py +++ b/tests/incite/collections/test_df_collection_item_thl_web.py @@ -1,7 +1,8 @@ -from datetime import datetime, timezone, timedelta +from datetime import datetime, timedelta, timezone from itertools import product as iter_product from os.path import join as pjoin -from pathlib import PurePath, Path +from pathlib import Path, PurePath +from typing import TYPE_CHECKING, Callable from uuid import uuid4 import dask.dataframe as dd @@ -11,13 +12,13 @@ from distributed import Client, Scheduler, Worker # noinspection PyUnresolvedReferences from distributed.utils_test import ( - gen_cluster, + cleanup, + client, client_no_amm, + cluster_fixture, + gen_cluster, loop, loop_in_thread, - cleanup, - cluster_fixture, - client, ) from faker import Faker from pandera import DataFrameSchema @@ -29,10 +30,14 @@ from generalresearch.incite.collections import ( DFCollectionType, ) from generalresearch.incite.schemas import ARCHIVE_AFTER +from generalresearch.models.thl.product import Product from generalresearch.models.thl.user import User from generalresearch.pg_helper import PostgresConfig from generalresearch.sql_helper import PostgresDsn -from test_utils.incite.conftest import mnt_filepath, incite_item_factory +from test_utils.incite.conftest import incite_item_factory, mnt_filepath + +if TYPE_CHECKING: + from generalresearch.incite.base import GRLDatasets fake = Faker() @@ -71,7 +76,7 @@ class TestDFCollectionItemBase: ) class TestDFCollectionItemProperties: - def test_filename(self, df_collection_data_type, df_collection, offset): + def test_filename(self, df_collection_data_type, df_collection, offset: str): for i in df_collection.items: assert isinstance(i.filename, str) @@ -88,35 +93,37 @@ class TestDFCollectionItemProperties: ) class TestDFCollectionItemPropertiesBase: - def test_name(self, df_collection_data_type, offset, df_collection): + def test_name(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.name, str) - def test_finish(self, df_collection_data_type, offset, df_collection): + def test_finish(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.finish, datetime) - def test_interval(self, df_collection_data_type, offset, df_collection): + def test_interval(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.interval, pd.Interval) - def test_partial_filename(self, df_collection_data_type, offset, df_collection): + def test_partial_filename( + self, df_collection_data_type, offset: str, df_collection + ): for i in df_collection.items: assert isinstance(i.partial_filename, str) - def test_empty_filename(self, df_collection_data_type, offset, df_collection): + def test_empty_filename(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.empty_filename, str) - def test_path(self, df_collection_data_type, offset, df_collection): + def test_path(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.path, FilePath) - def test_partial_path(self, df_collection_data_type, offset, df_collection): + def test_partial_path(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.partial_path, FilePath) - def test_empty_path(self, df_collection_data_type, offset, df_collection): + def test_empty_path(self, df_collection_data_type, offset: str, df_collection): for i in df_collection.items: assert isinstance(i.empty_path, FilePath) @@ -136,9 +143,9 @@ class TestDFCollectionItemMethod: def test_has_mysql( self, df_collection, - thl_web_rr, - offset, - duration, + thl_web_rr: PostgresConfig, + offset: str, + duration: timedelta, df_collection_data_type, delete_df_collection, ): @@ -166,9 +173,9 @@ class TestDFCollectionItemMethod: def test_update_partial_archive( self, df_collection, - offset, - duration, - thl_web_rw, + offset: str, + duration: timedelta, + thl_web_rw: PostgresConfig, df_collection_data_type, delete_df_collection, ): @@ -181,26 +188,26 @@ class TestDFCollectionItemMethod: def test_create_partial_archive( self, df_collection, - offset, - duration, + offset: str, + duration: str, create_main_accounts, - thl_web_rw, + thl_web_rw: PostgresConfig, thl_lm, df_collection_data_type, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, client_no_amm, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): assert 1 + 1 == 2 def test_dict( self, df_collection_data_type, - offset, - duration, + offset: str, + duration: timedelta, df_collection, delete_df_collection, ): @@ -224,12 +231,12 @@ class TestDFCollectionItemMethod: self, df_collection_data_type, df_collection, - offset, - duration, + offset: str, + duration: timedelta, create_main_accounts, - thl_web_rw, - user_factory, - product, + thl_web_rw: PostgresConfig, + user_factory: Callable[..., User], + product: Product, incite_item_factory, delete_df_collection, ): @@ -270,10 +277,10 @@ class TestDFCollectionItemMethod: self, df_collection_data_type, df_collection, - offset, - duration, - user_factory, - product, + offset: str, + duration: timedelta, + user_factory: Callable[..., User], + product: Product, incite_item_factory, delete_df_collection, ): @@ -316,15 +323,15 @@ class TestDFCollectionItemMethod: def test_from_mysql_ledger( self, df_collection, - user, + user: User, create_main_accounts, - offset, - duration, - thl_web_rw, + offset: str, + duration: timedelta, + thl_web_rw: PostgresConfig, thl_lm, df_collection_data_type, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, client_no_amm, incite_item_factory, delete_df_collection, @@ -371,12 +378,12 @@ class TestDFCollectionItemMethod: def test_to_archive( self, df_collection, - user, - offset, - duration, + user: User, + offset: str, + duration: timedelta, df_collection_data_type, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, client_no_amm, incite_item_factory, delete_df_collection, @@ -410,12 +417,12 @@ class TestDFCollectionItemMethod: self, df_collection_data_type, df_collection, - user_factory, - product, - offset, - duration, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, client_no_amm, - user, + user: User, incite_item_factory, delete_df_collection, mnt_filepath, @@ -481,19 +488,19 @@ class TestDFCollectionItemMethod: @pytest.mark.skip def test_to_archive_numbered_partial( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_initial_load( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_clear_corrupt_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @@ -505,28 +512,36 @@ class TestDFCollectionItemMethod: class TestDFCollectionItemMethodBase: @pytest.mark.skip - def test_path_exists(self, df_collection_data_type, offset, duration): + def test_path_exists( + self, df_collection_data_type, offset: str, duration: timedelta + ): pass @pytest.mark.skip - def test_next_numbered_path(self, df_collection_data_type, offset, duration): + def test_next_numbered_path( + self, df_collection_data_type, offset: str, duration: timedelta + ): pass @pytest.mark.skip def test_search_highest_numbered_path( - self, df_collection_data_type, offset, duration + self, df_collection_data_type, offset: str, duration: timedelta ): pass @pytest.mark.skip - def test_tmp_filename(self, df_collection_data_type, offset, duration): + def test_tmp_filename( + self, df_collection_data_type, offset: str, duration: timedelta + ): pass @pytest.mark.skip - def test_tmp_path(self, df_collection_data_type, offset, duration): + def test_tmp_path(self, df_collection_data_type, offset: str, duration: timedelta): pass - def test_is_empty(self, df_collection_data_type, df_collection, offset, duration): + def test_is_empty( + self, df_collection_data_type, df_collection, offset: str, duration: timedelta + ): """ test_has_empty was merged into this because item.has_empty is an alias for is_empty.. or vis-versa @@ -542,7 +557,7 @@ class TestDFCollectionItemMethodBase: assert item.has_empty() def test_has_partial_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): for item in df_collection.items: assert not item.has_partial_archive() @@ -550,7 +565,7 @@ class TestDFCollectionItemMethodBase: assert item.has_partial_archive() def test_has_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): for item in df_collection.items: # (1) Originally, nothing exists... so let's just make a file and @@ -587,7 +602,7 @@ class TestDFCollectionItemMethodBase: assert item.has_archive(include_empty=True) def test_delete_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): for item in df_collection.items: item: DFCollectionItem @@ -610,7 +625,7 @@ class TestDFCollectionItemMethodBase: assert not item.partial_path.exists() def test_should_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): schema: DataFrameSchema = df_collection._schema aa = schema.metadata[ARCHIVE_AFTER] @@ -627,11 +642,13 @@ class TestDFCollectionItemMethodBase: assert not item.should_archive() @pytest.mark.skip - def test_set_empty(self, df_collection_data_type, df_collection, offset, duration): + def test_set_empty( + self, df_collection_data_type, df_collection, offset: str, duration: timedelta + ): pass def test_valid_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): # Originally, nothing has been saved or anything.. so confirm it # always comes back as None @@ -655,17 +672,19 @@ class TestDFCollectionItemMethodBase: @pytest.mark.skip def test_validate_df( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_from_archive( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass - def test__to_dict(self, df_collection_data_type, df_collection, offset, duration): + def test__to_dict( + self, df_collection_data_type, df_collection, offset: str, duration: timedelta + ): for item in df_collection.items: res = item._to_dict() @@ -683,19 +702,19 @@ class TestDFCollectionItemMethodBase: @pytest.mark.skip def test_delete_partial( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_cleanup_partials( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @pytest.mark.skip def test_delete_dangling_partials( - self, df_collection_data_type, df_collection, offset, duration + self, df_collection_data_type, df_collection, offset: str, duration: timedelta ): pass @@ -715,7 +734,7 @@ async def test_client(client, s, worker): ) @gen_cluster(client=True, nthreads=[("127.0.0.1", 1)]) @pytest.mark.anyio -async def test_client_parametrize(c, s, w, df_collection_data_type, offset): +async def test_client_parametrize(c, s, w, df_collection_data_type, offset: str): """c,s,a are all required - the secondary Worker (b) is not required""" assert isinstance(c, Client), f"c is not Client, it's {type(c)}" @@ -740,16 +759,16 @@ class TestDFCollectionItemFunctionalTest: def test_to_archive_and_ddf( self, df_collection_data_type, - offset, - duration, + offset: str, + duration: timedelta, client_no_amm, df_collection, - user, - user_factory, - product, + user: User, + user_factory: Callable[..., User], + product: Product, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): from generalresearch.models.thl.user import User @@ -790,16 +809,16 @@ class TestDFCollectionItemFunctionalTest: def test_filesize_estimate( self, df_collection, - user, - offset, - duration, + user: User, + offset: str, + duration: timedelta, client_no_amm, - user_factory, - product, + user_factory: Callable[..., User], + product: Product, df_collection_data_type, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): """A functional test to write some Parquet files for the DFCollection and then confirm that the files get written @@ -809,9 +828,11 @@ class TestDFCollectionItemFunctionalTest: (1) Validating their passing the pandera schema (2) The file or dir has an expected size on disk """ + import os + import pyarrow.parquet as pq + from generalresearch.models.thl.user import User - import os if df_collection.data_type in unsupported_mock_types: return @@ -838,14 +859,14 @@ class TestDFCollectionItemFunctionalTest: self, client_no_amm, df_collection, - user_factory, - product, - offset, - duration, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, df_collection_data_type, incite_item_factory, delete_df_collection, - mnt_filepath, + mnt_filepath: "GRLDatasets", ): from generalresearch.models.thl.user import User @@ -875,7 +896,9 @@ class TestDFCollectionItemFunctionalTest: assert item.has_archive(include_empty=True) @pytest.mark.skip - def test_get_items(self, df_collection, product, offset, duration): + def test_get_items( + self, df_collection, product: Product, offset: str, duration: timedelta + ): with pytest.warns(expected_warning=ResourceWarning) as cm: df_collection.get_items_last365() assert "DFCollectionItem has missing archives" in str( @@ -892,11 +915,11 @@ class TestDFCollectionItemFunctionalTest: df_collection, incite_item_factory, delete_df_collection, - user_factory, - product, - offset, - duration, - mnt_filepath, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, + mnt_filepath: "GRLDatasets", ): """Don't allow creating an archive for data that will likely be overwritten or updated @@ -934,10 +957,10 @@ class TestDFCollectionItemFunctionalTest: df_collection, incite_item_factory, delete_df_collection, - user, - offset, - duration, - mnt_filepath, + user: User, + offset: str, + duration: timedelta, + mnt_filepath: "GRLDatasets", ): delete_df_collection(coll=df_collection) @@ -962,10 +985,10 @@ class TestDFCollectionItemFunctionalTest: df_collection, incite_item_factory, delete_df_collection, - user_factory, - product, - offset, - duration, + user_factory: Callable[..., User], + product: Product, + offset: str, + duration: timedelta, mnt_filepath, ): from generalresearch.models.thl.user import User |
