aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorstuppie2026-03-15 14:11:53 -0600
committerstuppie2026-03-15 14:11:53 -0600
commit2ca391c7b38c448cc9cd3fff3da5a06d5c2ce3a1 (patch)
treea7852700c138ba9532e675049e54834b752aa929
parentb1e533a6ac76ad127df0a7bf0bbf507e7938224e (diff)
downloadgeneralresearch-2ca391c7b38c448cc9cd3fff3da5a06d5c2ce3a1.tar.gz
generalresearch-2ca391c7b38c448cc9cd3fff3da5a06d5c2ce3a1.zip
django network models: denormalize from ToolRun, add indices, explicit pks, etc
-rw-r--r--generalresearch/managers/network/mtr.py6
-rw-r--r--generalresearch/managers/network/nmap.py4
-rw-r--r--generalresearch/managers/network/rdns.py6
-rw-r--r--generalresearch/models/network/mtr/features.py146
-rw-r--r--generalresearch/models/network/nmap/result.py1
-rw-r--r--generalresearch/thl_django/network/models.py43
6 files changed, 40 insertions, 166 deletions
diff --git a/generalresearch/managers/network/mtr.py b/generalresearch/managers/network/mtr.py
index 35e4871..9e4d773 100644
--- a/generalresearch/managers/network/mtr.py
+++ b/generalresearch/managers/network/mtr.py
@@ -16,11 +16,13 @@ class MTRRunManager(PostgresManager):
"""
INSERT INTO network_mtr (
run_id, source_ip, facility_id,
- protocol, port, parsed
+ protocol, port, parsed,
+ started_at, ip, scan_group_id
)
VALUES (
%(run_id)s, %(source_ip)s, %(facility_id)s,
- %(protocol)s, %(port)s, %(parsed)s
+ %(protocol)s, %(port)s, %(parsed)s,
+ %(started_at)s, %(ip)s, %(scan_group_id)s
);
"""
)
diff --git a/generalresearch/managers/network/nmap.py b/generalresearch/managers/network/nmap.py
index a9e6c56..f26fd44 100644
--- a/generalresearch/managers/network/nmap.py
+++ b/generalresearch/managers/network/nmap.py
@@ -20,14 +20,14 @@ class NmapRunManager(PostgresManager):
host_state_reason, latency_ms, distance,
uptime_seconds, last_boot,
parsed, scan_group_id, open_tcp_ports,
- started_at, ip
+ started_at, ip, open_udp_ports
)
VALUES (
%(run_id)s, %(xml_version)s, %(host_state)s,
%(host_state_reason)s, %(latency_ms)s, %(distance)s,
%(uptime_seconds)s, %(last_boot)s,
%(parsed)s, %(scan_group_id)s, %(open_tcp_ports)s,
- %(started_at)s, %(ip)s
+ %(started_at)s, %(ip)s, %(open_udp_ports)s
);
"""
)
diff --git a/generalresearch/managers/network/rdns.py b/generalresearch/managers/network/rdns.py
index 3543180..41e4138 100644
--- a/generalresearch/managers/network/rdns.py
+++ b/generalresearch/managers/network/rdns.py
@@ -15,11 +15,13 @@ class RDNSRunManager(PostgresManager):
query = """
INSERT INTO network_rdnsresult (
run_id, primary_hostname, primary_domain,
- hostname_count, hostnames
+ hostname_count, hostnames,
+ ip, started_at, scan_group_id
)
VALUES (
%(run_id)s, %(primary_hostname)s, %(primary_domain)s,
- %(hostname_count)s, %(hostnames)s
+ %(hostname_count)s, %(hostnames)s,
+ %(ip)s, %(started_at)s, %(scan_group_id)s
);
"""
params = run.model_dump_postgres()
diff --git a/generalresearch/models/network/mtr/features.py b/generalresearch/models/network/mtr/features.py
deleted file mode 100644
index e7f2ff1..0000000
--- a/generalresearch/models/network/mtr/features.py
+++ /dev/null
@@ -1,146 +0,0 @@
-from typing import List, Dict
-
-from pydantic import BaseModel, ConfigDict, Field
-
-from generalresearch.models.network.definitions import IPKind
-from generalresearch.models.network.mtr import MTRHop
-
-
-class MTRFeatures(BaseModel):
- model_config = ConfigDict()
-
- hop_count: int = Field()
-
- public_hop_count: int
- private_hop_count: int
-
- unique_asn_count: int
- asn_transition_count: int
-
- missing_hop_count: int
- missing_hop_ratio: float
-
- # typical for mobile (vs residential)
- private_hops_after_public: int
-
- has_cgnat: bool
-
-
-def trim_local_hops(hops: List[MTRHop]) -> List[MTRHop]:
- start = 0
- for i, h in enumerate(hops):
- if h.ip_kind == IPKind.PUBLIC:
- start = i
- break
- return hops[start:]
-
-
-def extract_mtr_features(hops: List[MTRHop]) -> Dict[str, float | int | bool | None]:
- features: Dict[str, float | int | bool | None] = {}
-
- if not hops:
- return {}
-
- hops = trim_local_hops(hops)
-
- features["hop_count"] = len(hops)
-
- private_hops = 0
- public_hops = 0
- for h in hops:
- if not h.ip:
- continue
- if h.ip_kind == IPKind.PUBLIC:
- public_hops += 1
- else:
- private_hops += 1
- features["private_hop_count"] = private_hops
- features["public_hop_count"] = public_hops
-
- # -----------------------
- # ASN structure
- # -----------------------
-
- asns = [h.asn for h in hops if h.asn]
-
- features["unique_asn_count"] = len(set(asns))
-
- asn_changes = 0
- for a, b in zip(asns, asns[1:]):
- if a != b:
- asn_changes += 1
-
- features["asn_transition_count"] = asn_changes
-
- # -----------------------
- # Missing hops
- # -----------------------
-
- missing_hops = sum(1 for h in hops if h.ip is None)
-
- features["missing_hop_count"] = missing_hops
- features["missing_hop_ratio"] = missing_hops / len(hops)
-
- # -----------------------
- # Packet loss
- # -----------------------
-
- lossy_hops = sum(1 for h in hops if h.loss_pct > 0)
-
- features["lossy_hop_count"] = lossy_hops
- features["max_loss_pct"] = max(h.loss_pct for h in hops)
-
- # -----------------------
- # Latency stats
- # -----------------------
-
- avg_rtts = [h.avg_ms for h in hops if h.avg_ms > 0]
-
- if avg_rtts:
- features["destination_rtt"] = avg_rtts[-1]
- features["mean_rtt"] = sum(avg_rtts) / len(avg_rtts)
- features["max_rtt"] = max(avg_rtts)
- else:
- features["destination_rtt"] = None
- features["mean_rtt"] = None
- features["max_rtt"] = None
-
- # -----------------------
- # RTT jumps
- # -----------------------
-
- rtt_jumps = []
-
- for a, b in zip(hops, hops[1:]):
- if a.avg_ms > 0 and b.avg_ms > 0:
- rtt_jumps.append(b.avg_ms - a.avg_ms)
-
- if rtt_jumps:
- features["max_rtt_jump"] = max(rtt_jumps)
- features["mean_rtt_jump"] = sum(rtt_jumps) / len(rtt_jumps)
- else:
- features["max_rtt_jump"] = None
- features["mean_rtt_jump"] = None
-
- # -----------------------
- # Jitter
- # -----------------------
-
- stdevs = [h.stdev_ms for h in hops if h.stdev_ms > 0]
-
- if stdevs:
- features["max_jitter"] = max(stdevs)
- features["mean_jitter"] = sum(stdevs) / len(stdevs)
- else:
- features["max_jitter"] = None
- features["mean_jitter"] = None
-
- # -----------------------
- # Route completion
- # -----------------------
-
- last = hops[-1]
-
- features["destination_reached"] = last.ip is not None and last.loss_pct < 100
-
- return features
diff --git a/generalresearch/models/network/nmap/result.py b/generalresearch/models/network/nmap/result.py
index 635db06..3ff2376 100644
--- a/generalresearch/models/network/nmap/result.py
+++ b/generalresearch/models/network/nmap/result.py
@@ -428,4 +428,5 @@ class NmapResult(BaseModel):
d["last_boot"] = self.last_boot
d["parsed"] = self.model_dump_json(indent=0)
d["open_tcp_ports"] = json.dumps(self.tcp_open_ports)
+ d["open_udp_ports"] = json.dumps(self.udp_open_ports)
return d
diff --git a/generalresearch/thl_django/network/models.py b/generalresearch/thl_django/network/models.py
index 2da7071..167af02 100644
--- a/generalresearch/thl_django/network/models.py
+++ b/generalresearch/thl_django/network/models.py
@@ -1,6 +1,6 @@
from uuid import uuid4
from django.utils import timezone
-from django.contrib.postgres.indexes import GistIndex
+from django.contrib.postgres.indexes import GistIndex, GinIndex
from django.db import models
@@ -32,6 +32,8 @@ class ToolRun(models.Model):
Represents one execution of one tool against one target
"""
+ id = models.BigAutoField(primary_key=True)
+
# The *Target* IP.
# Should correspond to an IP we already have in the thl_ipinformation table
ip = models.GenericIPAddressField()
@@ -60,7 +62,7 @@ class ToolRun(models.Model):
null=True,
)
- started_at = models.DateTimeField(auto_now_add=True)
+ started_at = models.DateTimeField()
finished_at = models.DateTimeField(null=True)
class Status(models.TextChoices):
@@ -97,14 +99,21 @@ class RDNSResult(models.Model):
primary_key=True,
)
+ # denormalized from ToolRun for query speed
+ ip = models.GenericIPAddressField()
+ started_at = models.DateTimeField()
+ scan_group_id = models.UUIDField()
+
primary_hostname = models.CharField(max_length=255, null=True)
- primary_domain = models.CharField(max_length=50, null=True)
+ primary_domain = models.CharField(max_length=255, null=True)
hostname_count = models.PositiveIntegerField(default=0)
hostnames = models.JSONField(default=list)
class Meta:
db_table = "network_rdnsresult"
indexes = [
+ models.Index(fields=["ip", "-started_at"]),
+ models.Index(fields=["scan_group_id"]),
models.Index(fields=["primary_hostname"]),
models.Index(fields=["primary_domain"]),
]
@@ -138,17 +147,20 @@ class PortScan(models.Model):
# Can be inferred through a join, but will make common queries easier
open_tcp_ports = models.JSONField(default=list)
+ open_udp_ports = models.JSONField(default=list)
class Meta:
db_table = "network_portscan"
indexes = [
- models.Index(fields=["started_at"]),
models.Index(fields=["scan_group_id"]),
- models.Index(fields=["ip"]),
+ models.Index(fields=["ip", "-started_at"]),
+ GinIndex(fields=["open_tcp_ports"]),
+ GinIndex(fields=["open_udp_ports"]),
]
class PortScanPort(models.Model):
+ id = models.BigAutoField(primary_key=True)
port_scan = models.ForeignKey(
PortScan,
on_delete=models.CASCADE,
@@ -174,13 +186,6 @@ class PortScanPort(models.Model):
fields=["port_scan", "protocol", "port"],
name="unique_port_per_scan",
),
- models.CheckConstraint(
- condition=(
- models.Q(protocol=1, port__isnull=True) # ICMP
- | models.Q(protocol__in=[6, 17], port__isnull=False)
- ),
- name="port_required_for_tcp_udp",
- ),
]
indexes = [
models.Index(fields=["port", "protocol", "state"]),
@@ -197,6 +202,11 @@ class MTR(models.Model):
primary_key=True,
)
+ # denormalized from ToolRun for query speed
+ ip = models.GenericIPAddressField()
+ started_at = models.DateTimeField()
+ scan_group_id = models.UUIDField()
+
# Source performing the trace
source_ip = models.GenericIPAddressField()
facility_id = models.PositiveIntegerField()
@@ -211,9 +221,14 @@ class MTR(models.Model):
class Meta:
db_table = "network_mtr"
+ indexes = [
+ models.Index(fields=["ip", "-started_at"]),
+ models.Index(fields=["scan_group_id"]),
+ ]
class MTRHop(models.Model):
+ id = models.BigAutoField(primary_key=True)
mtr_run = models.ForeignKey(
MTR,
on_delete=models.CASCADE,
@@ -223,7 +238,7 @@ class MTRHop(models.Model):
hop = models.PositiveSmallIntegerField()
ip = models.GenericIPAddressField(null=True)
- domain = models.CharField(max_length=50, null=True)
+ domain = models.CharField(max_length=255, null=True)
asn = models.PositiveIntegerField(null=True)
class Meta:
@@ -247,7 +262,7 @@ class IPLabel(models.Model):
Used for model training and evaluation.
"""
- id = models.BigAutoField(primary_key=True, null=False)
+ id = models.BigAutoField(primary_key=True)
ip = CIDRField()