aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--generalresearch/thl_django/app/settings.py1
-rw-r--r--generalresearch/thl_django/apps.py1
-rw-r--r--generalresearch/thl_django/fields.py21
-rw-r--r--generalresearch/thl_django/network/__init__.py0
-rw-r--r--generalresearch/thl_django/network/models.py387
5 files changed, 410 insertions, 0 deletions
diff --git a/generalresearch/thl_django/app/settings.py b/generalresearch/thl_django/app/settings.py
index 0d3f47a..050d8ab 100644
--- a/generalresearch/thl_django/app/settings.py
+++ b/generalresearch/thl_django/app/settings.py
@@ -1,6 +1,7 @@
import os
INSTALLED_APPS = [
+ "django.contrib.postgres",
"django.contrib.contenttypes",
"generalresearch.thl_django",
]
diff --git a/generalresearch/thl_django/apps.py b/generalresearch/thl_django/apps.py
index a3a0721..c30e66a 100644
--- a/generalresearch/thl_django/apps.py
+++ b/generalresearch/thl_django/apps.py
@@ -11,5 +11,6 @@ class THLSchemaConfig(AppConfig):
from .contest import models # noqa: F401 # pycharm: keep
from .event import models # noqa: F401 # pycharm: keep
from .marketplace import models # noqa: F401 # pycharm: keep
+ from .network import models # noqa: F401 # pycharm: keep
from .userhealth import models # noqa: F401 # pycharm: keep
from .userprofile import models # noqa: F401 # pycharm: keep
diff --git a/generalresearch/thl_django/fields.py b/generalresearch/thl_django/fields.py
new file mode 100644
index 0000000..5e40ef0
--- /dev/null
+++ b/generalresearch/thl_django/fields.py
@@ -0,0 +1,21 @@
+from django.db import models
+import ipaddress
+
+
+class CIDRField(models.Field):
+ description = "PostgreSQL CIDR network"
+
+ def db_type(self, connection):
+ return "cidr"
+
+ def to_python(self, value):
+ if value is None or isinstance(
+ value, (ipaddress.IPv4Network, ipaddress.IPv6Network)
+ ):
+ return value
+ return ipaddress.ip_network(value, strict=False)
+
+ def get_prep_value(self, value):
+ if value is None:
+ return None
+ return str(ipaddress.ip_network(value, strict=False))
diff --git a/generalresearch/thl_django/network/__init__.py b/generalresearch/thl_django/network/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/generalresearch/thl_django/network/__init__.py
diff --git a/generalresearch/thl_django/network/models.py b/generalresearch/thl_django/network/models.py
new file mode 100644
index 0000000..b0f4cdc
--- /dev/null
+++ b/generalresearch/thl_django/network/models.py
@@ -0,0 +1,387 @@
+from uuid import uuid4
+from django.utils import timezone
+from django.contrib.postgres.indexes import GistIndex
+
+from django.db import models
+
+from generalresearch.thl_django.fields import CIDRField
+
+
+#######
+# ** Signals **
+# ToolRun
+# PortScan
+# PortScanPort
+# RDNSResult
+# Traceroute
+# TracerouteHop
+
+# ** Features **
+# IPFeatureSnapshot
+
+# ** Labels **
+# IPLabel
+
+# ** Predictions **
+# IPPrediction
+#######
+
+
+class ToolRun(models.Model):
+ """
+ Represents one execution of one tool against one target
+ """
+
+ # The *Target* IP.
+ # Should correspond to an IP we already have in the thl_ipinformation table
+ ip = models.GenericIPAddressField()
+
+ # Logical grouping of multiple scans (fast scan + deep scan + rdns + trace, etc.)
+ scan_group_id = models.UUIDField(default=uuid4)
+
+ class ToolClass(models.TextChoices):
+ PORT_SCAN = "port_scan"
+ RDNS = "rdns"
+ PING = "ping"
+ TRACEROUTE = "traceroute"
+
+ tool_class = models.CharField(
+ max_length=32,
+ choices=ToolClass.choices,
+ )
+
+ # Actual binary used (e.g. nmap vs rustmap)
+ tool_name = models.CharField(
+ max_length=64,
+ )
+
+ tool_version = models.CharField(
+ max_length=32,
+ null=True,
+ )
+
+ started_at = models.DateTimeField(auto_now_add=True)
+ finished_at = models.DateTimeField(null=True)
+
+ class Status(models.TextChoices):
+ SUCCESS = "success"
+ FAILED = "failed"
+ TIMEOUT = "timeout"
+ ERROR = "error"
+
+ status = models.CharField(
+ max_length=16,
+ choices=Status.choices,
+ default=Status.SUCCESS,
+ )
+
+ # Raw CLI invocation
+ raw_command = models.TextField()
+ # Parsed arguments / normalized config
+ config = models.JSONField(null=True)
+
+ class Meta:
+ db_table = "network_toolrun"
+ indexes = [
+ models.Index(fields=["started_at"]),
+ models.Index(fields=["scan_group_id"]),
+ models.Index(fields=["ip", "-started_at"]),
+ ]
+
+
+class RDNSResult(models.Model):
+ run = models.OneToOneField(
+ ToolRun,
+ on_delete=models.CASCADE,
+ related_name="rdns",
+ primary_key=True,
+ )
+
+ primary_hostname = models.CharField(max_length=255, null=True)
+ primary_org = models.CharField(max_length=50, null=True)
+ hostname_count = models.PositiveIntegerField(default=0)
+ hostnames = models.JSONField(default=list)
+
+ class Meta:
+ db_table = "network_rdnsresult"
+ indexes = [
+ models.Index(fields=["primary_hostname"]),
+ models.Index(fields=["primary_org"]),
+ ]
+
+
+class PortScan(models.Model):
+ run = models.OneToOneField(
+ ToolRun,
+ on_delete=models.CASCADE,
+ related_name="port_scan",
+ primary_key=True,
+ )
+
+ # denormalized from ToolRun for query speed
+ ip = models.GenericIPAddressField()
+ started_at = models.DateTimeField()
+ scan_group_id = models.UUIDField()
+
+ xml_version = models.CharField(max_length=8)
+ host_state = models.CharField(max_length=16)
+ host_state_reason = models.CharField(max_length=32)
+
+ latency_ms = models.FloatField(null=True)
+ distance = models.IntegerField(null=True)
+
+ uptime_seconds = models.IntegerField(null=True)
+ last_boot = models.DateTimeField(null=True)
+
+ raw_xml = models.TextField(null=True)
+
+ # Full parsed output
+ parsed = models.JSONField()
+
+ # Can be inferred through a join, but will make common queries easier
+ open_tcp_ports = models.JSONField(default=list)
+
+ class Meta:
+ db_table = "network_portscan"
+ indexes = [
+ models.Index(fields=["started_at"]),
+ models.Index(fields=["scan_group_id"]),
+ models.Index(fields=["ip"]),
+ ]
+
+
+class PortScanPort(models.Model):
+ port_scan = models.ForeignKey(
+ PortScan,
+ on_delete=models.CASCADE,
+ related_name="ports",
+ )
+
+ # https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml
+ protocol = models.PositiveSmallIntegerField(default=6)
+ # nullable b/c ICMP doesn't use ports
+ port = models.PositiveIntegerField(null=True)
+
+ state = models.CharField(max_length=20)
+
+ reason = models.CharField(max_length=32, null=True)
+ reason_ttl = models.IntegerField(null=True)
+
+ service_name = models.CharField(max_length=64, null=True)
+
+ class Meta:
+ db_table = "network_portscanport"
+ constraints = [
+ models.UniqueConstraint(
+ fields=["port_scan", "protocol", "port"],
+ name="unique_port_per_scan",
+ ),
+ models.CheckConstraint(
+ condition=(
+ models.Q(protocol=1, port__isnull=True) # ICMP
+ | models.Q(protocol__in=[6, 17], port__isnull=False)
+ ),
+ name="port_required_for_tcp_udp",
+ ),
+ ]
+ indexes = [
+ models.Index(fields=["port", "protocol", "state"]),
+ models.Index(fields=["state"]),
+ models.Index(fields=["service_name"]),
+ ]
+
+
+class Traceroute(models.Model):
+ run = models.OneToOneField(
+ ToolRun,
+ on_delete=models.CASCADE,
+ related_name="traceroute",
+ primary_key=True,
+ )
+
+ # Source performing the trace
+ source_ip = models.GenericIPAddressField()
+ facility_id = models.PositiveIntegerField()
+
+ # IANA protocol numbers (1=ICMP, 6=TCP, 17=UDP)
+ protocol = models.PositiveSmallIntegerField(default=1)
+
+ max_hops = models.PositiveSmallIntegerField()
+
+ # High-level result summary
+ final_responded = models.BooleanField()
+ reached_hop = models.PositiveSmallIntegerField(null=True)
+ total_duration_ms = models.PositiveIntegerField(null=True)
+
+ class Meta:
+ db_table = "network_traceroute"
+
+
+class TracerouteHop(models.Model):
+ traceroute = models.ForeignKey(
+ Traceroute,
+ on_delete=models.CASCADE,
+ related_name="hops",
+ )
+
+ hop_number = models.PositiveSmallIntegerField()
+ probe_number = models.PositiveSmallIntegerField()
+
+ responder_ip = models.GenericIPAddressField(null=True)
+
+ rtt_ms = models.FloatField(null=True)
+
+ ttl = models.PositiveSmallIntegerField(null=True)
+
+ icmp_type = models.PositiveSmallIntegerField(null=True)
+ icmp_code = models.PositiveSmallIntegerField(null=True)
+
+ class Meta:
+ db_table = "network_traceroutehop"
+ constraints = [
+ models.UniqueConstraint(
+ fields=["traceroute", "hop_number", "probe_number"],
+ name="unique_probe_per_hop",
+ )
+ ]
+ indexes = [
+ models.Index(fields=["traceroute", "hop_number"]),
+ models.Index(fields=["responder_ip"]),
+ ]
+ ordering = ["traceroute_id", "hop_number", "probe_number"]
+
+ def __str__(self):
+ return f"{self.traceroute} hop {self.hop_number}.{self.probe_number}"
+
+
+# class TracerouteAnalysis(models.Model):
+# traceroute = models.OneToOneField(
+# Traceroute,
+# on_delete=models.CASCADE,
+# related_name="analysis",
+# primary_key=True,
+# )
+#
+# reached_destination = models.BooleanField()
+#
+# hop_count = models.PositiveSmallIntegerField()
+#
+# latency_spike_detected = models.BooleanField(default=False)
+#
+# max_rtt_ms = models.FloatField(null=True)
+# rtt_stddev = models.FloatField(null=True)
+#
+# last_hop_private = models.BooleanField(default=False)
+# last_hop_asn = models.PositiveIntegerField(null=True)
+#
+# # Deterministic hash of first N hops (binary SHA256 recommended)
+# path_prefix_hash = models.BinaryField(max_length=32, null=True)
+#
+# anomaly_score = models.FloatField(null=True)
+#
+# class Meta:
+# db_table = "network_tracerouteanalysis"
+# indexes = [
+# models.Index(fields=["path_prefix_hash"]),
+# models.Index(fields=["anomaly_score"]),
+# ]
+#
+
+
+class IPLabel(models.Model):
+ """
+ Stores *ground truth* about an IP at a specific time.
+ Used for model training and evaluation.
+ """
+ id = models.BigAutoField(primary_key=True, null=False)
+
+ ip = CIDRField()
+
+ labeled_at = models.DateTimeField(default=timezone.now)
+ created_at = models.DateTimeField(auto_now_add=True)
+
+ label_kind = models.CharField(max_length=32)
+
+ source = models.CharField(max_length=32)
+
+ confidence = models.FloatField(default=1.0)
+
+ provider = models.CharField(
+ max_length=128,
+ null=True,
+ help_text="Proxy/VPN provider if known (e.g. geonode, brightdata)",
+ )
+
+ metadata = models.JSONField(null=True)
+
+ class Meta:
+ db_table = "network_iplabel"
+ indexes = [
+ GistIndex(fields=["ip"]),
+ models.Index(fields=["-labeled_at"]),
+ models.Index(fields=["ip", "-labeled_at"]),
+ models.Index(fields=["label_kind"]),
+ ]
+ constraints = [
+ models.UniqueConstraint(
+ fields=["ip", "label_kind", "source", "labeled_at"],
+ name="unique_ip_label_event",
+ )
+ ]
+
+
+# #########
+# # Below here Copied/pasted from chatgpt, todo: evaluate this
+# #########
+#
+#
+# class IPFeatureSnapshot(models.Model):
+# """
+# Example features:
+# open_proxy_port
+# rdns_residential_score
+# distance
+# asn_type
+# latency
+# mobile_network_likelihood
+# """
+#
+# ip = models.GenericIPAddressField(db_index=True)
+#
+# scan_group_id = models.UUIDField(db_index=True)
+#
+# computed_at = models.DateTimeField(auto_now_add=True)
+#
+# features = models.JSONField()
+#
+# class Meta:
+# db_table = "network_ip_feature_snapshot"
+# indexes = [
+# models.Index(fields=["ip", "-computed_at"]),
+# models.Index(fields=["scan_group_id"]),
+# ]
+#
+#
+# class IPPrediction(models.Model):
+#
+# ip = models.GenericIPAddressField(db_index=True)
+#
+# scan_group_id = models.UUIDField(db_index=True)
+#
+# predicted_at = models.DateTimeField(auto_now_add=True)
+#
+# model_version = models.CharField(max_length=32)
+#
+# risk_score = models.FloatField()
+#
+# feature_scores = models.JSONField()
+#
+# metadata = models.JSONField(default=dict)
+#
+# class Meta:
+# db_table = "network_ip_prediction"
+# indexes = [
+# models.Index(fields=["ip", "-predicted_at"]),
+# models.Index(fields=["scan_group_id"]),
+# models.Index(fields=["risk_score"]),
+# ]