diff options
| -rw-r--r-- | generalresearch/managers/network/mtr.py | 6 | ||||
| -rw-r--r-- | generalresearch/managers/network/nmap.py | 4 | ||||
| -rw-r--r-- | generalresearch/managers/network/rdns.py | 6 | ||||
| -rw-r--r-- | generalresearch/models/network/mtr/features.py | 146 | ||||
| -rw-r--r-- | generalresearch/models/network/nmap/result.py | 1 | ||||
| -rw-r--r-- | generalresearch/thl_django/network/models.py | 43 |
6 files changed, 40 insertions, 166 deletions
diff --git a/generalresearch/managers/network/mtr.py b/generalresearch/managers/network/mtr.py index 35e4871..9e4d773 100644 --- a/generalresearch/managers/network/mtr.py +++ b/generalresearch/managers/network/mtr.py @@ -16,11 +16,13 @@ class MTRRunManager(PostgresManager): """ INSERT INTO network_mtr ( run_id, source_ip, facility_id, - protocol, port, parsed + protocol, port, parsed, + started_at, ip, scan_group_id ) VALUES ( %(run_id)s, %(source_ip)s, %(facility_id)s, - %(protocol)s, %(port)s, %(parsed)s + %(protocol)s, %(port)s, %(parsed)s, + %(started_at)s, %(ip)s, %(scan_group_id)s ); """ ) diff --git a/generalresearch/managers/network/nmap.py b/generalresearch/managers/network/nmap.py index a9e6c56..f26fd44 100644 --- a/generalresearch/managers/network/nmap.py +++ b/generalresearch/managers/network/nmap.py @@ -20,14 +20,14 @@ class NmapRunManager(PostgresManager): host_state_reason, latency_ms, distance, uptime_seconds, last_boot, parsed, scan_group_id, open_tcp_ports, - started_at, ip + started_at, ip, open_udp_ports ) VALUES ( %(run_id)s, %(xml_version)s, %(host_state)s, %(host_state_reason)s, %(latency_ms)s, %(distance)s, %(uptime_seconds)s, %(last_boot)s, %(parsed)s, %(scan_group_id)s, %(open_tcp_ports)s, - %(started_at)s, %(ip)s + %(started_at)s, %(ip)s, %(open_udp_ports)s ); """ ) diff --git a/generalresearch/managers/network/rdns.py b/generalresearch/managers/network/rdns.py index 3543180..41e4138 100644 --- a/generalresearch/managers/network/rdns.py +++ b/generalresearch/managers/network/rdns.py @@ -15,11 +15,13 @@ class RDNSRunManager(PostgresManager): query = """ INSERT INTO network_rdnsresult ( run_id, primary_hostname, primary_domain, - hostname_count, hostnames + hostname_count, hostnames, + ip, started_at, scan_group_id ) VALUES ( %(run_id)s, %(primary_hostname)s, %(primary_domain)s, - %(hostname_count)s, %(hostnames)s + %(hostname_count)s, %(hostnames)s, + %(ip)s, %(started_at)s, %(scan_group_id)s ); """ params = run.model_dump_postgres() diff --git a/generalresearch/models/network/mtr/features.py b/generalresearch/models/network/mtr/features.py deleted file mode 100644 index e7f2ff1..0000000 --- a/generalresearch/models/network/mtr/features.py +++ /dev/null @@ -1,146 +0,0 @@ -from typing import List, Dict - -from pydantic import BaseModel, ConfigDict, Field - -from generalresearch.models.network.definitions import IPKind -from generalresearch.models.network.mtr import MTRHop - - -class MTRFeatures(BaseModel): - model_config = ConfigDict() - - hop_count: int = Field() - - public_hop_count: int - private_hop_count: int - - unique_asn_count: int - asn_transition_count: int - - missing_hop_count: int - missing_hop_ratio: float - - # typical for mobile (vs residential) - private_hops_after_public: int - - has_cgnat: bool - - -def trim_local_hops(hops: List[MTRHop]) -> List[MTRHop]: - start = 0 - for i, h in enumerate(hops): - if h.ip_kind == IPKind.PUBLIC: - start = i - break - return hops[start:] - - -def extract_mtr_features(hops: List[MTRHop]) -> Dict[str, float | int | bool | None]: - features: Dict[str, float | int | bool | None] = {} - - if not hops: - return {} - - hops = trim_local_hops(hops) - - features["hop_count"] = len(hops) - - private_hops = 0 - public_hops = 0 - for h in hops: - if not h.ip: - continue - if h.ip_kind == IPKind.PUBLIC: - public_hops += 1 - else: - private_hops += 1 - features["private_hop_count"] = private_hops - features["public_hop_count"] = public_hops - - # ----------------------- - # ASN structure - # ----------------------- - - asns = [h.asn for h in hops if h.asn] - - features["unique_asn_count"] = len(set(asns)) - - asn_changes = 0 - for a, b in zip(asns, asns[1:]): - if a != b: - asn_changes += 1 - - features["asn_transition_count"] = asn_changes - - # ----------------------- - # Missing hops - # ----------------------- - - missing_hops = sum(1 for h in hops if h.ip is None) - - features["missing_hop_count"] = missing_hops - features["missing_hop_ratio"] = missing_hops / len(hops) - - # ----------------------- - # Packet loss - # ----------------------- - - lossy_hops = sum(1 for h in hops if h.loss_pct > 0) - - features["lossy_hop_count"] = lossy_hops - features["max_loss_pct"] = max(h.loss_pct for h in hops) - - # ----------------------- - # Latency stats - # ----------------------- - - avg_rtts = [h.avg_ms for h in hops if h.avg_ms > 0] - - if avg_rtts: - features["destination_rtt"] = avg_rtts[-1] - features["mean_rtt"] = sum(avg_rtts) / len(avg_rtts) - features["max_rtt"] = max(avg_rtts) - else: - features["destination_rtt"] = None - features["mean_rtt"] = None - features["max_rtt"] = None - - # ----------------------- - # RTT jumps - # ----------------------- - - rtt_jumps = [] - - for a, b in zip(hops, hops[1:]): - if a.avg_ms > 0 and b.avg_ms > 0: - rtt_jumps.append(b.avg_ms - a.avg_ms) - - if rtt_jumps: - features["max_rtt_jump"] = max(rtt_jumps) - features["mean_rtt_jump"] = sum(rtt_jumps) / len(rtt_jumps) - else: - features["max_rtt_jump"] = None - features["mean_rtt_jump"] = None - - # ----------------------- - # Jitter - # ----------------------- - - stdevs = [h.stdev_ms for h in hops if h.stdev_ms > 0] - - if stdevs: - features["max_jitter"] = max(stdevs) - features["mean_jitter"] = sum(stdevs) / len(stdevs) - else: - features["max_jitter"] = None - features["mean_jitter"] = None - - # ----------------------- - # Route completion - # ----------------------- - - last = hops[-1] - - features["destination_reached"] = last.ip is not None and last.loss_pct < 100 - - return features diff --git a/generalresearch/models/network/nmap/result.py b/generalresearch/models/network/nmap/result.py index 635db06..3ff2376 100644 --- a/generalresearch/models/network/nmap/result.py +++ b/generalresearch/models/network/nmap/result.py @@ -428,4 +428,5 @@ class NmapResult(BaseModel): d["last_boot"] = self.last_boot d["parsed"] = self.model_dump_json(indent=0) d["open_tcp_ports"] = json.dumps(self.tcp_open_ports) + d["open_udp_ports"] = json.dumps(self.udp_open_ports) return d diff --git a/generalresearch/thl_django/network/models.py b/generalresearch/thl_django/network/models.py index 2da7071..167af02 100644 --- a/generalresearch/thl_django/network/models.py +++ b/generalresearch/thl_django/network/models.py @@ -1,6 +1,6 @@ from uuid import uuid4 from django.utils import timezone -from django.contrib.postgres.indexes import GistIndex +from django.contrib.postgres.indexes import GistIndex, GinIndex from django.db import models @@ -32,6 +32,8 @@ class ToolRun(models.Model): Represents one execution of one tool against one target """ + id = models.BigAutoField(primary_key=True) + # The *Target* IP. # Should correspond to an IP we already have in the thl_ipinformation table ip = models.GenericIPAddressField() @@ -60,7 +62,7 @@ class ToolRun(models.Model): null=True, ) - started_at = models.DateTimeField(auto_now_add=True) + started_at = models.DateTimeField() finished_at = models.DateTimeField(null=True) class Status(models.TextChoices): @@ -97,14 +99,21 @@ class RDNSResult(models.Model): primary_key=True, ) + # denormalized from ToolRun for query speed + ip = models.GenericIPAddressField() + started_at = models.DateTimeField() + scan_group_id = models.UUIDField() + primary_hostname = models.CharField(max_length=255, null=True) - primary_domain = models.CharField(max_length=50, null=True) + primary_domain = models.CharField(max_length=255, null=True) hostname_count = models.PositiveIntegerField(default=0) hostnames = models.JSONField(default=list) class Meta: db_table = "network_rdnsresult" indexes = [ + models.Index(fields=["ip", "-started_at"]), + models.Index(fields=["scan_group_id"]), models.Index(fields=["primary_hostname"]), models.Index(fields=["primary_domain"]), ] @@ -138,17 +147,20 @@ class PortScan(models.Model): # Can be inferred through a join, but will make common queries easier open_tcp_ports = models.JSONField(default=list) + open_udp_ports = models.JSONField(default=list) class Meta: db_table = "network_portscan" indexes = [ - models.Index(fields=["started_at"]), models.Index(fields=["scan_group_id"]), - models.Index(fields=["ip"]), + models.Index(fields=["ip", "-started_at"]), + GinIndex(fields=["open_tcp_ports"]), + GinIndex(fields=["open_udp_ports"]), ] class PortScanPort(models.Model): + id = models.BigAutoField(primary_key=True) port_scan = models.ForeignKey( PortScan, on_delete=models.CASCADE, @@ -174,13 +186,6 @@ class PortScanPort(models.Model): fields=["port_scan", "protocol", "port"], name="unique_port_per_scan", ), - models.CheckConstraint( - condition=( - models.Q(protocol=1, port__isnull=True) # ICMP - | models.Q(protocol__in=[6, 17], port__isnull=False) - ), - name="port_required_for_tcp_udp", - ), ] indexes = [ models.Index(fields=["port", "protocol", "state"]), @@ -197,6 +202,11 @@ class MTR(models.Model): primary_key=True, ) + # denormalized from ToolRun for query speed + ip = models.GenericIPAddressField() + started_at = models.DateTimeField() + scan_group_id = models.UUIDField() + # Source performing the trace source_ip = models.GenericIPAddressField() facility_id = models.PositiveIntegerField() @@ -211,9 +221,14 @@ class MTR(models.Model): class Meta: db_table = "network_mtr" + indexes = [ + models.Index(fields=["ip", "-started_at"]), + models.Index(fields=["scan_group_id"]), + ] class MTRHop(models.Model): + id = models.BigAutoField(primary_key=True) mtr_run = models.ForeignKey( MTR, on_delete=models.CASCADE, @@ -223,7 +238,7 @@ class MTRHop(models.Model): hop = models.PositiveSmallIntegerField() ip = models.GenericIPAddressField(null=True) - domain = models.CharField(max_length=50, null=True) + domain = models.CharField(max_length=255, null=True) asn = models.PositiveIntegerField(null=True) class Meta: @@ -247,7 +262,7 @@ class IPLabel(models.Model): Used for model training and evaluation. """ - id = models.BigAutoField(primary_key=True, null=False) + id = models.BigAutoField(primary_key=True) ip = CIDRField() |
