1 files changed, 113 insertions, 0 deletions
diff --git a/jb/models/custom_types.py b/jb/models/custom_types.py
new file mode 100644
index 0000000..70bc5c1
--- /dev/null
+++ b/jb/models/custom_types.py
@@ -0,0 +1,113 @@
+import re
+from datetime import datetime, timezone
+from typing import Any, Optional
+from uuid import UUID
+
+from pydantic import (
+    AwareDatetime,
+    StringConstraints,
+    TypeAdapter,
+    HttpUrl,
+)
+from pydantic.functional_serializers import PlainSerializer
+from pydantic.functional_validators import AfterValidator, BeforeValidator
+from pydantic.networks import UrlConstraints
+from pydantic_core import Url
+from typing_extensions import Annotated
+
+
+def convert_datetime_to_iso_8601_with_z_suffix(dt: datetime) -> str:
+    # By default, datetimes are serialized with the %f optional. We don't want that because
+    #   then the deserialization fails if the datetime didn't have microseconds.
+    return dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def convert_str_dt(v: Any) -> Optional[AwareDatetime]:
+    # By default, pydantic is unable to handle tz-aware isoformat str. Attempt to parse a str
+    #   that was dumped using the iso8601 format with Z suffix.
+    if v is not None and type(v) is str:
+        assert v.endswith("Z") and "T" in v, "invalid format"
+        return datetime.strptime(v, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
+            tzinfo=timezone.utc
+        )
+    return v
+
+
+def assert_utc(v: AwareDatetime) -> AwareDatetime:
+    if isinstance(v, datetime):
+        assert v.tzinfo == timezone.utc, "Timezone is not UTC"
+    return v
+
+
+# Our custom AwareDatetime that correctly serializes and deserializes
+#   to an ISO8601 str with timezone
+AwareDatetimeISO = Annotated[
+    AwareDatetime,
+    BeforeValidator(convert_str_dt),
+    AfterValidator(assert_utc),
+    PlainSerializer(
+        lambda x: x.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
+        when_used="json-unless-none",
+    ),
+]
+
+# ISO 3166-1 alpha-2 (two-letter codes, lowercase)
+# "Like" b/c it matches the format, but we're not explicitly checking
+#   it is one of our supported values. See models.thl.locales for that.
+CountryISOLike = Annotated[
+    str, StringConstraints(max_length=2, min_length=2, pattern=r"^[a-z]{2}$")
+]
+# 3-char ISO 639-2/B, lowercase
+LanguageISOLike = Annotated[
+    str, StringConstraints(max_length=3, min_length=3, pattern=r"^[a-z]{3}$")
+]
+
+
+def check_valid_uuid(v: str) -> str:
+    try:
+        assert UUID(v).hex == v
+    except Exception:
+        raise ValueError("Invalid UUID")
+    return v
+
+
+# Our custom field that stores a UUID4 as the .hex string representation
+UUIDStr = Annotated[
+    str,
+    StringConstraints(min_length=32, max_length=32),
+    AfterValidator(check_valid_uuid),
+]
+# Accepts the non-hex representation and coerces
+UUIDStrCoerce = Annotated[
+    str,
+    StringConstraints(min_length=32, max_length=32),
+    BeforeValidator(lambda value: TypeAdapter(UUID).validate_python(value).hex),
+    AfterValidator(check_valid_uuid),
+]
+
+# Same thing as UUIDStr with HttpUrl field. It is confusing that this
+# is not a str https://github.com/pydantic/pydantic/discussions/6395
+HttpUrlStr = Annotated[
+    str,
+    BeforeValidator(lambda value: str(TypeAdapter(HttpUrl).validate_python(value))),
+]
+
+HttpsUrl = Annotated[Url, UrlConstraints(max_length=2083, allowed_schemes=["https"])]
+HttpsUrlStr = Annotated[
+    str,
+    BeforeValidator(lambda value: str(TypeAdapter(HttpsUrl).validate_python(value))),
+]
+
+
+def check_valid_amt_boto3_id(v: str) -> str:
+    # Test ids from amazon have 20 chars
+    if not re.fullmatch(r"[A-Z0-9]{20}|[A-Z0-9]{30}", v):
+        raise ValueError("Invalid AMT Boto3 ID")
+    return v
+
+
+AMTBoto3ID = Annotated[
+    str,
+    StringConstraints(min_length=20, max_length=30),
+    AfterValidator(check_valid_amt_boto3_id),
+]