Initial commitv3.3.4

author: Max Nanis 2026-03-06 16:49:46 -0500
committer: Max Nanis 2026-03-06 16:49:46 -0500
commit: 91d040211a4ed6e4157896256a762d3854777b5e (patch)
tree: cd95922ea4257dc8d3f4e4cbe8534474709a20dc /tests/incite/__init__.py
download: generalresearch-91d040211a4ed6e4157896256a762d3854777b5e.tar.gz
generalresearch-91d040211a4ed6e4157896256a762d3854777b5e.zip
1 files changed, 137 insertions, 0 deletions
diff --git a/tests/incite/__init__.py b/tests/incite/__init__.py
new file mode 100644
index 0000000..2f736e8
--- /dev/null
+++ b/tests/incite/__init__.py
@@ -0,0 +1,137 @@
+# class TestParquetBehaviors(CleanTempDirectoryTestCls):
+#     wall_coll = WallDFCollection(
+#         start=GLOBAL_VARS["wall"].start,
+#         offset="49h",
+#         archive_path=f"{settings.incite_mount_dir}/raw/df-collections/{DFCollectionType.WALL.value}",
+#     )
+#
+#     def test_filters(self):
+#         # Using REAL data here
+#         start = datetime(year=2024, month=1, day=15, hour=12, tzinfo=timezone.utc)
+#         end = datetime(year=2024, month=1, day=15, hour=20, tzinfo=timezone.utc)
+#         end_max = datetime(
+#             year=2024, month=1, day=15, hour=20, tzinfo=timezone.utc
+#         ) + timedelta(hours=2)
+#
+#         ir = pd.Interval(left=pd.Timestamp(start), right=pd.Timestamp(end))
+#         wall_items = [w for w in self.wall_coll.items if w.interval.overlaps(ir)]
+#         ddf = self.wall_coll.ddf(
+#             items=wall_items,
+#             include_partial=True,
+#             force_rr_latest=False,
+#             columns=["started", "finished"],
+#             filters=[
+#                 ("started", ">=", start),
+#                 ("started", "<", end),
+#             ],
+#         )
+#
+#         df = ddf.compute()
+#         self.assertIsInstance(df, pd.DataFrame)
+#
+#         # No started=None, and they're all between the started and the end
+#         self.assertFalse(df.started.isna().any())
+#         self.assertFalse((df.started < start).any())
+#         self.assertFalse((df.started > end).any())
+#
+#         # Has finished=None and finished=time, so
+#         # the finished is all between the started and
+#         # the end_max
+#         self.assertTrue(df.finished.isna().any())
+#         self.assertTrue((df.finished.dt.year == 2024).any())
+#
+#         self.assertFalse((df.finished > end_max).any())
+#         self.assertFalse((df.finished < start).any())
+#
+#     # def test_user_id_list(self):
+#     #     # Calling compute turns it into a np.ndarray
+#     #     user_ids = self.instance.ddf(
+#     #         columns=["user_id"]
+#     #     ).user_id.unique().values.compute()
+#     #     self.assertIsInstance(user_ids, np.ndarray)
+#     #
+#     #     # If ddf filters work with ndarray
+#     #     user_product_merge = <todo: assign>
+#     #
+#     #     with self.assertRaises(TypeError) as cm:
+#     #         user_product_merge.ddf(
+#     #             filters=[("id", "in", user_ids)])
+#     #     self.assertIn("Value of 'in' filter must be a list, set or tuple.", str(cm.exception))
+#     #
+#     #     # No compute == dask array
+#     #     user_ids = self.instance.ddf(
+#     #         columns=["user_id"]
+#     #     ).user_id.unique().values
+#     #     self.assertIsInstance(user_ids, da.Array)
+#     #
+#     #     with self.assertRaises(TypeError) as cm:
+#     #         user_product_merge.ddf(
+#     #             filters=[("id", "in", user_ids)])
+#     #     self.assertIn("Value of 'in' filter must be a list, set or tuple.", str(cm.exception))
+#     #
+#     #     # pick a product_id (most active one)
+#     #     self.product_id = instance.df.product_id.value_counts().index[0]
+#     #     self.expected_columns: int = len(instance._schema.columns)
+#     #     self.instance = instance
+#
+#     # def test_basic(self):
+#     #     # now try to load up the data!
+#     #     self.instance.grouped_key = self.product_id
+#     #
+#     #     # Confirm any of the items are archived
+#     #     self.assertTrue(self.instance.progress.has_archive.eq(True).any())
+#     #
+#     #     # Confirm it returns a df
+#     #     df = self.instance.dd().compute()
+#     #
+#     #     self.assertFalse(df.empty)
+#     #     self.assertEqual(df.shape[1], self.expected_columns)
+#     #     self.assertGreater(df.shape[0], 1)
+#     #
+#     #     # Confirm that DF only contains this product_id
+#     #     self.assertEqual(df[df.product_id == self.product_id].shape, df.shape)
+#
+#     # def test_god_vs_product_id(self):
+#     #     self.instance.grouped_key = self.product_id
+#     #     df_product_origin = self.instance.dd(columns=None, filters=None).compute()
+#     #
+#     #     self.instance.grouped_key = None
+#     #     df_god_origin = self.instance.dd(columns=None,
+#     #                                      filters=[("product_id", "==", self.product_id)]).compute()
+#     #
+#     #     self.assertTrue(df_god_origin.equals(df_product_origin))
+#
+#     #
+#     #     instance = POPSessionMerge(
+#     #         start=START,
+#     #         archive_path=self.PATH,
+#     #         group_by="product_id"
+#     #     )
+#     #     instance.build(U=GLOBAL_VARS["user"], S=GLOBAL_VARS["session"], W=GLOBAL_VARS["wall"])
+#     #     instance.save(god_only=False)
+#     #
+#     #     # pick a product_id (most active one)
+#     #     self.product_id = instance.df.product_id.value_counts().index[0]
+#     #     self.expected_columns: int = len(instance._schema.columns)
+#     #     self.instance = instance
+#
+#
+# class TestValidItem(CleanTempDirectoryTestCls):
+#
+#     def test_interval(self):
+#         for k in GLOBAL_VARS.keys():
+#             coll = GLOBAL_VARS[k]
+#             item = coll.items[0]
+#             ir = item.interval
+#
+#             self.assertIsInstance(ir, pd.Interval)
+#             self.assertLess(a=ir.left, b=ir.right)
+#
+#     def test_str(self):
+#         for k in GLOBAL_VARS.keys():
+#             coll = GLOBAL_VARS[k]
+#             item = coll.items[0]
+#
+#             offset = coll.offset or "–"
+#
+#             self.assertIn(offset, str(item))
author	Max Nanis	2026-03-06 16:49:46 -0500
committer	Max Nanis	2026-03-06 16:49:46 -0500
commit	91d040211a4ed6e4157896256a762d3854777b5e (patch)
tree	cd95922ea4257dc8d3f4e4cbe8534474709a20dc /tests/incite/__init__.py
download	generalresearch-91d040211a4ed6e4157896256a762d3854777b5e.tar.gz generalresearch-91d040211a4ed6e4157896256a762d3854777b5e.zip