aboutsummaryrefslogtreecommitdiff
path: root/tests/incite/schemas/test_admin_responses.py
diff options
context:
space:
mode:
authorMax Nanis2026-03-06 16:49:46 -0500
committerMax Nanis2026-03-06 16:49:46 -0500
commit91d040211a4ed6e4157896256a762d3854777b5e (patch)
treecd95922ea4257dc8d3f4e4cbe8534474709a20dc /tests/incite/schemas/test_admin_responses.py
downloadgeneralresearch-91d040211a4ed6e4157896256a762d3854777b5e.tar.gz
generalresearch-91d040211a4ed6e4157896256a762d3854777b5e.zip
Initial commitv3.3.4
Diffstat (limited to 'tests/incite/schemas/test_admin_responses.py')
-rw-r--r--tests/incite/schemas/test_admin_responses.py239
1 files changed, 239 insertions, 0 deletions
diff --git a/tests/incite/schemas/test_admin_responses.py b/tests/incite/schemas/test_admin_responses.py
new file mode 100644
index 0000000..43aa399
--- /dev/null
+++ b/tests/incite/schemas/test_admin_responses.py
@@ -0,0 +1,239 @@
+from datetime import datetime, timezone, timedelta
+from random import sample
+from typing import List
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from generalresearch.incite.schemas import empty_dataframe_from_schema
+from generalresearch.incite.schemas.admin_responses import (
+ AdminPOPSchema,
+ SIX_HOUR_SECONDS,
+)
+from generalresearch.locales import Localelator
+
+
+class TestAdminPOPSchema:
+ schema_df = empty_dataframe_from_schema(AdminPOPSchema)
+ countries = list(Localelator().get_all_countries())[:5]
+ dates = [datetime(year=2024, month=1, day=i, tzinfo=None) for i in range(1, 10)]
+
+ @classmethod
+ def assign_valid_vals(cls, df: pd.DataFrame) -> pd.DataFrame:
+ for c in df.columns:
+ check_attrs: dict = AdminPOPSchema.columns[c].checks[0].statistics
+ df[c] = np.random.randint(
+ check_attrs["min_value"], check_attrs["max_value"], df.shape[0]
+ )
+
+ return df
+
+ def test_empty(self):
+ with pytest.raises(Exception):
+ AdminPOPSchema.validate(pd.DataFrame())
+
+ def test_new_empty_df(self):
+ df = empty_dataframe_from_schema(AdminPOPSchema)
+
+ assert isinstance(df, pd.DataFrame)
+ assert isinstance(df.index, pd.MultiIndex)
+ assert df.columns.size == len(AdminPOPSchema.columns)
+
+ def test_valid(self):
+ # (1) Works with raw naive datetime
+ dates = [
+ datetime(year=2024, month=1, day=i, tzinfo=None).isoformat()
+ for i in range(1, 10)
+ ]
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[dates, self.countries], names=["index0", "index1"]
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ df = AdminPOPSchema.validate(df)
+ assert isinstance(df, pd.DataFrame)
+
+ # (2) Works with isoformat naive datetime
+ dates = [datetime(year=2024, month=1, day=i, tzinfo=None) for i in range(1, 10)]
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[dates, self.countries], names=["index0", "index1"]
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ df = AdminPOPSchema.validate(df)
+ assert isinstance(df, pd.DataFrame)
+
+ def test_index_tz_parser(self):
+ tz_dates = [
+ datetime(year=2024, month=1, day=i, tzinfo=timezone.utc)
+ for i in range(1, 10)
+ ]
+
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[tz_dates, self.countries], names=["index0", "index1"]
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ # Initially, they're all set with a timezone
+ timestmaps: List[pd.Timestamp] = [i for i in df.index.get_level_values(0)]
+ assert all([ts.tz == timezone.utc for ts in timestmaps])
+
+ # After validation, the timezone is removed
+ df = AdminPOPSchema.validate(df)
+ timestmaps: List[pd.Timestamp] = [i for i in df.index.get_level_values(0)]
+ assert all([ts.tz is None for ts in timestmaps])
+
+ def test_index_tz_no_future_beyond_one_year(self):
+ now = datetime.now(tz=timezone.utc)
+ tz_dates = [now + timedelta(days=i * 365) for i in range(1, 10)]
+
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[tz_dates, self.countries], names=["index0", "index1"]
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ with pytest.raises(Exception) as cm:
+ AdminPOPSchema.validate(df)
+
+ assert (
+ "Index 'index0' failed element-wise validator "
+ "number 0: less_than(" in str(cm.value)
+ )
+
+ def test_index_only_str(self):
+ # --- float64 to str! ---
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[self.dates, np.random.rand(1, 10)[0]],
+ names=["index0", "index1"],
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ vals = [i for i in df.index.get_level_values(1)]
+ assert all([isinstance(v, float) for v in vals])
+
+ df = AdminPOPSchema.validate(df, lazy=True)
+
+ vals = [i for i in df.index.get_level_values(1)]
+ assert all([isinstance(v, str) for v in vals])
+
+ # --- int to str ---
+
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[self.dates, sample(range(100), 20)],
+ names=["index0", "index1"],
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ vals = [i for i in df.index.get_level_values(1)]
+ assert all([isinstance(v, int) for v in vals])
+
+ df = AdminPOPSchema.validate(df, lazy=True)
+
+ vals = [i for i in df.index.get_level_values(1)]
+ assert all([isinstance(v, str) for v in vals])
+
+ # a = 1
+ assert isinstance(df, pd.DataFrame)
+
+ def test_invalid_parsing(self):
+ # (1) Timezones AND as strings will still parse correctly
+ tz_str_dates = [
+ datetime(
+ year=2024, month=1, day=1, minute=i, tzinfo=timezone.utc
+ ).isoformat()
+ for i in range(1, 10)
+ ]
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[tz_str_dates, self.countries],
+ names=["index0", "index1"],
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+ df = AdminPOPSchema.validate(df, lazy=True)
+
+ assert isinstance(df, pd.DataFrame)
+ timestmaps: List[pd.Timestamp] = [i for i in df.index.get_level_values(0)]
+ assert all([ts.tz is None for ts in timestmaps])
+
+ # (2) Timezones are removed
+ dates = [
+ datetime(year=2024, month=1, day=1, minute=i, tzinfo=timezone.utc)
+ for i in range(1, 10)
+ ]
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[dates, self.countries], names=["index0", "index1"]
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ # Has tz before validation, and none after
+ timestmaps: List[pd.Timestamp] = [i for i in df.index.get_level_values(0)]
+ assert all([ts.tz is timezone.utc for ts in timestmaps])
+
+ df = AdminPOPSchema.validate(df, lazy=True)
+
+ timestmaps: List[pd.Timestamp] = [i for i in df.index.get_level_values(0)]
+ assert all([ts.tz is None for ts in timestmaps])
+
+ def test_clipping(self):
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[self.dates, self.countries],
+ names=["index0", "index1"],
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+ df = AdminPOPSchema.validate(df)
+ assert df.elapsed_avg.max() < SIX_HOUR_SECONDS
+
+ # Now that we know it's valid, break the elapsed avg
+ df["elapsed_avg"] = np.random.randint(
+ SIX_HOUR_SECONDS, SIX_HOUR_SECONDS + 10_000, df.shape[0]
+ )
+ assert df.elapsed_avg.max() > SIX_HOUR_SECONDS
+
+ # Confirm it doesn't fail if the values are greater, and that
+ # all the values are clipped to the max
+ df = AdminPOPSchema.validate(df)
+ assert df.elapsed_avg.eq(SIX_HOUR_SECONDS).all()
+
+ def test_rounding(self):
+ df = pd.DataFrame(
+ index=pd.MultiIndex.from_product(
+ iterables=[self.dates, self.countries],
+ names=["index0", "index1"],
+ ),
+ columns=self.schema_df.columns,
+ )
+ df = self.assign_valid_vals(df)
+
+ df["payout_avg"] = 2.123456789900002
+
+ assert df.payout_avg.sum() == 95.5555555455001
+
+ df = AdminPOPSchema.validate(df)
+ assert df.payout_avg.sum() == 95.40000000000003