diff options
50 files changed, 3493 insertions, 11 deletions
diff --git a/generalresearch/managers/network/__init__.py b/generalresearch/managers/network/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/generalresearch/managers/network/__init__.py diff --git a/generalresearch/managers/network/label.py b/generalresearch/managers/network/label.py new file mode 100644 index 0000000..65c63e5 --- /dev/null +++ b/generalresearch/managers/network/label.py @@ -0,0 +1,147 @@ +from datetime import datetime, timezone, timedelta +from typing import Collection, Optional, List + +from psycopg import sql +from pydantic import TypeAdapter, IPvAnyNetwork + +from generalresearch.managers.base import PostgresManager +from generalresearch.models.custom_types import ( + AwareDatetimeISO, + IPvAnyAddressStr, + IPvAnyNetworkStr, +) +from generalresearch.models.network.label import IPLabel, IPLabelKind, IPLabelSource + + +class IPLabelManager(PostgresManager): + def create(self, ip_label: IPLabel) -> IPLabel: + query = sql.SQL( + """ + INSERT INTO network_iplabel ( + ip, labeled_at, created_at, + label_kind, source, confidence, + provider, metadata + ) VALUES ( + %(ip)s, %(labeled_at)s, %(created_at)s, + %(label_kind)s, %(source)s, %(confidence)s, + %(provider)s, %(metadata)s + ) RETURNING id;""" + ) + params = ip_label.model_dump_postgres() + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + c.execute(query, params) + pk = c.fetchone()["id"] + return ip_label + + def make_filter_str( + self, + ips: Optional[Collection[IPvAnyNetworkStr]] = None, + ip_in_network: Optional[IPvAnyAddressStr] = None, + label_kind: Optional[IPLabelKind] = None, + source: Optional[IPLabelSource] = None, + labeled_at: Optional[AwareDatetimeISO] = None, + labeled_after: Optional[AwareDatetimeISO] = None, + labeled_before: Optional[AwareDatetimeISO] = None, + provider: Optional[str] = None, + ): + filters = [] + params = {} + if labeled_after or labeled_before: + time_end = labeled_before or datetime.now(tz=timezone.utc) + time_start = labeled_after or datetime(2017, 1, 1, tzinfo=timezone.utc) + assert time_start.tzinfo.utcoffset(time_start) == timedelta(), "must be UTC" + assert time_end.tzinfo.utcoffset(time_end) == timedelta(), "must be UTC" + filters.append("labeled_at BETWEEN %(time_start)s AND %(time_end)s") + params["time_start"] = time_start + params["time_end"] = time_end + if labeled_at: + assert labeled_at.tzinfo.utcoffset(labeled_at) == timedelta(), "must be UTC" + filters.append("labeled_at == %(labeled_at)s") + params["labeled_at"] = labeled_at + if label_kind: + filters.append("label_kind = %(label_kind)s") + params["label_kind"] = label_kind.value + if source: + filters.append("source = %(source)s") + params["source"] = source.value + if provider: + filters.append("provider = %(provider)s") + params["provider"] = provider + if ips is not None: + filters.append("ip = ANY(%(ips)s)") + params["ips"] = list(ips) + if ip_in_network: + """ + Return matching networks. + e.g. ip = '13f9:c462:e039:a38c::1', might return rows + where ip = '13f9:c462:e039::/48' or '13f9:c462:e039:a38c::/64' + """ + filters.append("ip >>= %(ip_in_network)s") + params["ip_in_network"] = ip_in_network + + filter_str = "WHERE " + " AND ".join(filters) if filters else "" + return filter_str, params + + def filter( + self, + ips: Optional[Collection[IPvAnyNetworkStr]] = None, + ip_in_network: Optional[IPvAnyAddressStr] = None, + label_kind: Optional[IPLabelKind] = None, + source: Optional[IPLabelSource] = None, + labeled_at: Optional[AwareDatetimeISO] = None, + labeled_after: Optional[AwareDatetimeISO] = None, + labeled_before: Optional[AwareDatetimeISO] = None, + provider: Optional[str] = None, + ) -> List[IPLabel]: + filter_str, params = self.make_filter_str( + ips=ips, + ip_in_network=ip_in_network, + label_kind=label_kind, + source=source, + labeled_at=labeled_at, + labeled_after=labeled_after, + labeled_before=labeled_before, + provider=provider, + ) + query = f""" + SELECT + ip, labeled_at, created_at, + label_kind, source, confidence, + provider, metadata + FROM network_iplabel + {filter_str} + """ + res = self.pg_config.execute_sql_query(query, params) + return [IPLabel.model_validate(rec) for rec in res] + + def get_most_specific_matching_network(self, ip: IPvAnyAddressStr) -> IPvAnyNetwork: + """ + e.g. ip = 'b5f4:dc2:f136:70d5:5b6e:9a85:c7d4:3517', might return + 'b5f4:dc2:f136:70d5::/64' + """ + ip = TypeAdapter(IPvAnyAddressStr).validate_python(ip) + + query = """ + SELECT ip + FROM network_iplabel + WHERE ip >>= %(ip)s + ORDER BY masklen(ip) DESC + LIMIT 1;""" + res = self.pg_config.execute_sql_query(query, {"ip": ip}) + if res: + return IPvAnyNetwork(res[0]["ip"]) + + def test_join(self, ip): + query = """ + SELECT + to_jsonb(i) AS ipinfo, + to_jsonb(l) AS iplabel + FROM thl_ipinformation i + LEFT JOIN network_iplabel l + ON l.ip >>= i.ip + WHERE i.ip = %(ip)s + ORDER BY masklen(l.ip) DESC;""" + params = {"ip": ip} + res = self.pg_config.execute_sql_query(query, params) + return res diff --git a/generalresearch/managers/network/mtr.py b/generalresearch/managers/network/mtr.py new file mode 100644 index 0000000..9e4d773 --- /dev/null +++ b/generalresearch/managers/network/mtr.py @@ -0,0 +1,53 @@ +from typing import Optional + +from psycopg import Cursor, sql + +from generalresearch.managers.base import PostgresManager +from generalresearch.models.network.tool_run import MTRRun + + +class MTRRunManager(PostgresManager): + + def _create(self, run: MTRRun, c: Optional[Cursor] = None) -> None: + """ + Do not use this directly. Must only be used in the context of a toolrun + """ + query = sql.SQL( + """ + INSERT INTO network_mtr ( + run_id, source_ip, facility_id, + protocol, port, parsed, + started_at, ip, scan_group_id + ) + VALUES ( + %(run_id)s, %(source_ip)s, %(facility_id)s, + %(protocol)s, %(port)s, %(parsed)s, + %(started_at)s, %(ip)s, %(scan_group_id)s + ); + """ + ) + params = run.model_dump_postgres() + + query_hops = sql.SQL( + """ + INSERT INTO network_mtrhop ( + hop, ip, domain, asn, mtr_run_id + ) VALUES ( + %(hop)s, %(ip)s, %(domain)s, + %(asn)s, %(mtr_run_id)s + ) + """ + ) + mtr_run = run.parsed + params_hops = [h.model_dump_postgres(run_id=run.id) for h in mtr_run.hops] + + if c: + c.execute(query, params) + if params_hops: + c.executemany(query_hops, params_hops) + else: + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + c.execute(query, params) + if params_hops: + c.executemany(query_hops, params_hops) diff --git a/generalresearch/managers/network/nmap.py b/generalresearch/managers/network/nmap.py new file mode 100644 index 0000000..f26fd44 --- /dev/null +++ b/generalresearch/managers/network/nmap.py @@ -0,0 +1,63 @@ +from typing import Optional + +from psycopg import Cursor, sql + +from generalresearch.managers.base import PostgresManager +from generalresearch.models.network.tool_run import NmapRun + + +class NmapRunManager(PostgresManager): + + def _create(self, run: NmapRun, c: Optional[Cursor] = None) -> None: + """ + Insert a PortScan + PortScanPorts from a Pydantic NmapResult. + Do not use this directly. Must only be used in the context of a toolrun + """ + query = sql.SQL( + """ + INSERT INTO network_portscan ( + run_id, xml_version, host_state, + host_state_reason, latency_ms, distance, + uptime_seconds, last_boot, + parsed, scan_group_id, open_tcp_ports, + started_at, ip, open_udp_ports + ) + VALUES ( + %(run_id)s, %(xml_version)s, %(host_state)s, + %(host_state_reason)s, %(latency_ms)s, %(distance)s, + %(uptime_seconds)s, %(last_boot)s, + %(parsed)s, %(scan_group_id)s, %(open_tcp_ports)s, + %(started_at)s, %(ip)s, %(open_udp_ports)s + ); + """ + ) + params = run.model_dump_postgres() + + query_ports = sql.SQL( + """ + INSERT INTO network_portscanport ( + port_scan_id, protocol, port, + state, reason, reason_ttl, + service_name + ) VALUES ( + %(port_scan_id)s, %(protocol)s, %(port)s, + %(state)s, %(reason)s, %(reason_ttl)s, + %(service_name)s + ) + """ + ) + nmap_run = run.parsed + params_ports = [p.model_dump_postgres(run_id=run.id) for p in nmap_run.ports] + + if c: + c.execute(query, params) + if nmap_run.ports: + c.executemany(query_ports, params_ports) + else: + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + c.execute(query, params) + if nmap_run.ports: + c.executemany(query_ports, params_ports) + + return None diff --git a/generalresearch/managers/network/rdns.py b/generalresearch/managers/network/rdns.py new file mode 100644 index 0000000..41e4138 --- /dev/null +++ b/generalresearch/managers/network/rdns.py @@ -0,0 +1,33 @@ +from typing import Optional + +from psycopg import Cursor + +from generalresearch.managers.base import PostgresManager +from generalresearch.models.network.tool_run import RDNSRun + + +class RDNSRunManager(PostgresManager): + + def _create(self, run: RDNSRun, c: Optional[Cursor] = None) -> None: + """ + Do not use this directly. Must only be used in the context of a toolrun + """ + query = """ + INSERT INTO network_rdnsresult ( + run_id, primary_hostname, primary_domain, + hostname_count, hostnames, + ip, started_at, scan_group_id + ) + VALUES ( + %(run_id)s, %(primary_hostname)s, %(primary_domain)s, + %(hostname_count)s, %(hostnames)s, + %(ip)s, %(started_at)s, %(scan_group_id)s + ); + """ + params = run.model_dump_postgres() + if c: + c.execute(query, params) + else: + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + c.execute(query, params) diff --git a/generalresearch/managers/network/tool_run.py b/generalresearch/managers/network/tool_run.py new file mode 100644 index 0000000..17f4935 --- /dev/null +++ b/generalresearch/managers/network/tool_run.py @@ -0,0 +1,139 @@ +from typing import Collection, List, Dict + +from psycopg import Cursor, sql + +from generalresearch.managers.base import PostgresManager, Permission + +from generalresearch.managers.network.nmap import NmapRunManager +from generalresearch.managers.network.rdns import RDNSRunManager +from generalresearch.managers.network.mtr import MTRRunManager +from generalresearch.models.network.rdns.result import RDNSResult +from generalresearch.models.network.tool_run import ( + NmapRun, + RDNSRun, + MTRRun, + ToolRun, + ToolName, +) +from generalresearch.pg_helper import PostgresConfig + + +class ToolRunManager(PostgresManager): + def __init__( + self, + pg_config: PostgresConfig, + permissions: Collection[Permission] = None, + ): + super().__init__(pg_config=pg_config, permissions=permissions) + self.nmap_manager = NmapRunManager(self.pg_config) + self.rdns_manager = RDNSRunManager(self.pg_config) + self.mtr_manager = MTRRunManager(self.pg_config) + + def _create_tool_run(self, run: NmapRun | RDNSRun | MTRRun, c: Cursor): + query = sql.SQL( + """ + INSERT INTO network_toolrun ( + ip, scan_group_id, tool_class, + tool_name, tool_version, started_at, + finished_at, status, raw_command, + config + ) + VALUES ( + %(ip)s, %(scan_group_id)s, %(tool_class)s, + %(tool_name)s, %(tool_version)s, %(started_at)s, + %(finished_at)s, %(status)s, %(raw_command)s, + %(config)s + ) RETURNING id; + """ + ) + params = run.model_dump_postgres() + c.execute(query, params) + run_id = c.fetchone()["id"] + run.id = run_id + return None + + def create_tool_run(self, run: NmapRun | RDNSRun | MTRRun): + if type(run) is NmapRun: + return self.create_nmap_run(run) + elif type(run) is RDNSRun: + return self.create_rdns_run(run) + elif type(run) is MTRRun: + return self.create_mtr_run(run) + else: + raise ValueError("unrecognized run type") + + def get_latest_runs_by_tool(self, ip: str) -> Dict[ToolName, ToolRun]: + query = """ + SELECT DISTINCT ON (tool_name) * + FROM network_toolrun + WHERE ip = %(ip)s + ORDER BY tool_name, started_at DESC; + """ + params = {"ip": ip} + res = self.pg_config.execute_sql_query(query, params=params) + runs = [ToolRun.model_validate(x) for x in res] + return {r.tool_name: r for r in runs} + + def create_nmap_run(self, run: NmapRun) -> NmapRun: + """ + Insert a PortScan + PortScanPorts from a Pydantic NmapResult. + """ + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + self._create_tool_run(run, c) + self.nmap_manager._create(run, c=c) + return run + + def get_nmap_run(self, id: int) -> NmapRun: + query = """ + SELECT tr.*, np.parsed + FROM network_toolrun tr + JOIN network_portscan np ON tr.id = np.run_id + WHERE id = %(id)s + """ + params = {"id": id} + res = self.pg_config.execute_sql_query(query, params)[0] + return NmapRun.model_validate(res) + + def create_rdns_run(self, run: RDNSRun) -> RDNSRun: + """ + Insert a RDnsRun + RDNSResult + """ + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + self._create_tool_run(run, c) + self.rdns_manager._create(run, c=c) + return run + + def get_rdns_run(self, id: int) -> RDNSRun: + query = """ + SELECT tr.*, hostnames + FROM network_toolrun tr + JOIN network_rdnsresult np ON tr.id = np.run_id + WHERE id = %(id)s + """ + params = {"id": id} + res = self.pg_config.execute_sql_query(query, params)[0] + parsed = RDNSResult.model_validate( + {"ip": res["ip"], "hostnames": res["hostnames"]} + ) + res["parsed"] = parsed + return RDNSRun.model_validate(res) + + def create_mtr_run(self, run: MTRRun) -> MTRRun: + with self.pg_config.make_connection() as conn: + with conn.cursor() as c: + self._create_tool_run(run, c) + self.mtr_manager._create(run, c=c) + return run + + def get_mtr_run(self, id: int) -> MTRRun: + query = """ + SELECT tr.*, mtr.parsed, mtr.source_ip, mtr.facility_id + FROM network_toolrun tr + JOIN network_mtr mtr ON tr.id = mtr.run_id + WHERE id = %(id)s + """ + params = {"id": id} + res = self.pg_config.execute_sql_query(query, params)[0] + return MTRRun.model_validate(res) diff --git a/generalresearch/models/custom_types.py b/generalresearch/models/custom_types.py index aefbbe9..ea96741 100644 --- a/generalresearch/models/custom_types.py +++ b/generalresearch/models/custom_types.py @@ -14,7 +14,7 @@ from pydantic import ( ) from pydantic.functional_serializers import PlainSerializer from pydantic.functional_validators import AfterValidator, BeforeValidator -from pydantic.networks import UrlConstraints +from pydantic.networks import UrlConstraints, IPvAnyNetwork from pydantic_core import Url from typing_extensions import Annotated @@ -121,13 +121,19 @@ HttpsUrlStr = Annotated[ BeforeValidator(lambda value: str(TypeAdapter(HttpsUrl).validate_python(value))), ] -# Same thing as UUIDStr with IPvAnyAddress field. It is confusing that this is not a str +# Same thing as UUIDStr with IPvAnyAddress field IPvAnyAddressStr = Annotated[ str, BeforeValidator( lambda value: str(TypeAdapter(IPvAnyAddress).validate_python(value).exploded) ), ] +IPvAnyNetworkStr = Annotated[ + str, + BeforeValidator( + lambda value: str(TypeAdapter(IPvAnyNetwork).validate_python(value)) + ), +] def coerce_int_to_str(data: Any) -> Any: @@ -279,3 +285,7 @@ PropertyCode = Annotated[ pattern=r"^[a-z]{1,2}\:.*", ), ] + + +def now_utc_factory(): + return datetime.now(tz=timezone.utc) diff --git a/generalresearch/models/network/__init__.py b/generalresearch/models/network/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/generalresearch/models/network/__init__.py diff --git a/generalresearch/models/network/definitions.py b/generalresearch/models/network/definitions.py new file mode 100644 index 0000000..4fb44f4 --- /dev/null +++ b/generalresearch/models/network/definitions.py @@ -0,0 +1,69 @@ +from enum import StrEnum +from ipaddress import ip_address, ip_network +from typing import Optional + +CGNAT_NET = ip_network("100.64.0.0/10") + + +class IPProtocol(StrEnum): + TCP = "tcp" + UDP = "udp" + SCTP = "sctp" + IP = "ip" + ICMP = "icmp" + ICMPv6 = "icmpv6" + + def to_number(self) -> int: + # https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml + return { + self.TCP: 6, + self.UDP: 17, + self.SCTP: 132, + self.IP: 4, + self.ICMP: 1, + self.ICMPv6: 58, + }[self] + + +class IPKind(StrEnum): + PUBLIC = "public" + PRIVATE = "private" + CGNAT = "carrier_nat" + LOOPBACK = "loopback" + LINK_LOCAL = "link_local" + MULTICAST = "multicast" + RESERVED = "reserved" + UNSPECIFIED = "unspecified" + + +def get_ip_kind(ip: Optional[str]) -> Optional[IPKind]: + if not ip: + return None + + ip_obj = ip_address(ip) + + if ip_obj in CGNAT_NET: + return IPKind.CGNAT + + if ip_obj.is_loopback: + return IPKind.LOOPBACK + + if ip_obj.is_link_local: + return IPKind.LINK_LOCAL + + if ip_obj.is_multicast: + return IPKind.MULTICAST + + if ip_obj.is_unspecified: + return IPKind.UNSPECIFIED + + if ip_obj.is_private: + return IPKind.PRIVATE + + if ip_obj.is_reserved: + return IPKind.RESERVED + + if ip_obj.is_global: + return IPKind.PUBLIC + + return None diff --git a/generalresearch/models/network/label.py b/generalresearch/models/network/label.py new file mode 100644 index 0000000..b9a7659 --- /dev/null +++ b/generalresearch/models/network/label.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import ipaddress +from enum import StrEnum +from typing import Optional, List + +from pydantic import ( + BaseModel, + Field, + computed_field, + field_validator, + ConfigDict, + IPvAnyNetwork, +) + +from generalresearch.models.custom_types import ( + AwareDatetimeISO, + now_utc_factory, +) + + +class IPTrustClass(StrEnum): + TRUSTED = "trusted" + UNTRUSTED = "untrusted" + # Note: use case of unknown is for e.g. Spur says this IP is a residential proxy + # on 2026-1-1, and then has no annotation a month later. It doesn't mean + # the IP is TRUSTED, but we want to record that Spur now doesn't claim UNTRUSTED. + UNKNOWN = "unknown" + + +class IPLabelKind(StrEnum): + # --- UNTRUSTED --- + RESIDENTIAL_PROXY = "residential_proxy" + DATACENTER_PROXY = "datacenter_proxy" + ISP_PROXY = "isp_proxy" + MOBILE_PROXY = "mobile_proxy" + PROXY = "proxy" + HOSTING = "hosting" + VPN = "vpn" + RELAY = "relay" + TOR_EXIT = "tor_exit" + BAD_ACTOR = "bad_actor" + # --- TRUSTED --- + TRUSTED_USER = "trusted_user" + # --- UNKNOWN --- + UNKNOWN = "unknown" + + +class IPLabelSource(StrEnum): + # We got this IP from our own use of a proxy service + INTERNAL_USE = "internal_use" + + # An external "security" service flagged this IP + SPUR = "spur" + IPINFO = "ipinfo" + MAXMIND = "maxmind" + + MANUAL = "manual" + + +class IPLabel(BaseModel): + """ + Stores *ground truth* about an IP at a specific time. + To be used for model training and evaluation. + """ + + model_config = ConfigDict(validate_assignment=True) + + ip: IPvAnyNetwork = Field() + + labeled_at: AwareDatetimeISO = Field(default_factory=now_utc_factory) + created_at: Optional[AwareDatetimeISO] = Field(default=None) + + label_kind: IPLabelKind = Field() + source: IPLabelSource = Field() + + confidence: float = Field(default=1.0, ge=0.0, le=1.0) + + # Optionally, if this is untrusted, which service is providing the proxy/vpn service + provider: Optional[str] = Field( + default=None, examples=["geonode", "gecko"], max_length=128 + ) + + metadata: Optional[IPLabelMetadata] = Field(default=None) + + @field_validator("ip", mode="before") + @classmethod + def normalize_and_validate_network(cls, v): + net = ipaddress.ip_network(v, strict=False) + + if isinstance(net, ipaddress.IPv6Network): + if net.prefixlen > 64: + raise ValueError("IPv6 network must be /64 or larger") + + return net + + @field_validator("provider", mode="before") + @classmethod + def provider_format(cls, v: Optional[str]) -> Optional[str]: + if v is None: + return v + return v.lower().strip() + + @computed_field() + @property + def trust_class(self) -> IPTrustClass: + if self.label_kind == IPLabelKind.UNKNOWN: + return IPTrustClass.UNKNOWN + if self.label_kind == IPLabelKind.TRUSTED_USER: + return IPTrustClass.TRUSTED + return IPTrustClass.UNTRUSTED + + def model_dump_postgres(self): + d = self.model_dump(mode="json") + d["metadata"] = self.metadata.model_dump_json() if self.metadata else None + return d + + +class IPLabelMetadata(BaseModel): + """ + To be expanded. Just for storing some things from Spur for now + """ + + model_config = ConfigDict(validate_assignment=True, extra="allow") + + services: Optional[List[str]] = Field(min_length=1, examples=[["RDP"]]) diff --git a/generalresearch/models/network/mtr/__init__.py b/generalresearch/models/network/mtr/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/generalresearch/models/network/mtr/__init__.py diff --git a/generalresearch/models/network/mtr/command.py b/generalresearch/models/network/mtr/command.py new file mode 100644 index 0000000..f8d2d49 --- /dev/null +++ b/generalresearch/models/network/mtr/command.py @@ -0,0 +1,70 @@ +import subprocess +from typing import List, Optional + +from generalresearch.models.network.definitions import IPProtocol +from generalresearch.models.network.mtr.parser import parse_mtr_output +from generalresearch.models.network.mtr.result import MTRResult +from generalresearch.models.network.tool_run_command import MTRRunCommand + +SUPPORTED_PROTOCOLS = { + IPProtocol.TCP, + IPProtocol.UDP, + IPProtocol.SCTP, + IPProtocol.ICMP, +} +PROTOCOLS_W_PORT = {IPProtocol.TCP, IPProtocol.UDP, IPProtocol.SCTP} + + +def build_mtr_command( + ip: str, + protocol: Optional[IPProtocol] = None, + port: Optional[int] = None, + report_cycles: int = 10, +) -> str: + # https://manpages.ubuntu.com/manpages/focal/man8/mtr.8.html + # e.g. "mtr -r -c 2 -b -z -j -T -P 443 74.139.70.149" + args = ["mtr", "--report", "--show-ips", "--aslookup", "--json"] + if report_cycles is not None: + args.extend(["-c", str(int(report_cycles))]) + if port is not None: + if protocol is None: + protocol = IPProtocol.TCP + assert protocol in PROTOCOLS_W_PORT, "port only allowed for TCP/SCTP/UDP traces" + args.extend(["--port", str(int(port))]) + if protocol: + assert protocol in SUPPORTED_PROTOCOLS, f"unsupported protocol: {protocol}" + # default is ICMP (no args) + arg_map = { + IPProtocol.TCP: "--tcp", + IPProtocol.UDP: "--udp", + IPProtocol.SCTP: "--sctp", + } + if protocol in arg_map: + args.append(arg_map[protocol]) + args.append(ip) + return " ".join(args) + + +def get_mtr_version() -> str: + proc = subprocess.run( + ["mtr", "-v"], + capture_output=True, + text=True, + check=False, + ) + # e.g. mtr 0.95 + ver_str = proc.stdout.strip() + return ver_str.split(" ", 1)[1] + + +def run_mtr(config: MTRRunCommand) -> MTRResult: + cmd = config.to_command_str() + args = cmd.split(" ") + proc = subprocess.run( + args, + capture_output=True, + text=True, + check=False, + ) + raw = proc.stdout.strip() + return parse_mtr_output(raw, protocol=config.options.protocol, port=config.options.port) diff --git a/generalresearch/models/network/mtr/execute.py b/generalresearch/models/network/mtr/execute.py new file mode 100644 index 0000000..a6fb82a --- /dev/null +++ b/generalresearch/models/network/mtr/execute.py @@ -0,0 +1,55 @@ +from datetime import datetime, timezone +from typing import Optional +from uuid import uuid4 + +from generalresearch.models.custom_types import UUIDStr +from generalresearch.models.network.definitions import IPProtocol +from generalresearch.models.network.mtr.command import ( + run_mtr, + get_mtr_version, + build_mtr_command, +) +from generalresearch.models.network.tool_run import MTRRun, ToolName, ToolClass, Status +from generalresearch.models.network.tool_run_command import ( + MTRRunCommand, + MTRRunCommandOptions, +) +from generalresearch.models.network.utils import get_source_ip + + +def execute_mtr( + ip: str, + scan_group_id: Optional[UUIDStr] = None, + protocol: Optional[IPProtocol] = IPProtocol.ICMP, + port: Optional[int] = None, + report_cycles: int = 10, +) -> MTRRun: + config = MTRRunCommand( + options=MTRRunCommandOptions( + ip=ip, + report_cycles=report_cycles, + protocol=protocol, + port=port, + ), + ) + + started_at = datetime.now(tz=timezone.utc) + tool_version = get_mtr_version() + result = run_mtr(config) + finished_at = datetime.now(tz=timezone.utc) + + return MTRRun( + tool_name=ToolName.MTR, + tool_class=ToolClass.TRACEROUTE, + tool_version=tool_version, + status=Status.SUCCESS, + ip=ip, + started_at=started_at, + finished_at=finished_at, + raw_command=config.to_command_str(), + scan_group_id=scan_group_id or uuid4().hex, + config=config, + parsed=result, + source_ip=get_source_ip(), + facility_id=1, + ) diff --git a/generalresearch/models/network/mtr/parser.py b/generalresearch/models/network/mtr/parser.py new file mode 100644 index 0000000..685eeca --- /dev/null +++ b/generalresearch/models/network/mtr/parser.py @@ -0,0 +1,19 @@ +import json +from typing import Dict + +from generalresearch.models.network.definitions import IPProtocol +from generalresearch.models.network.mtr.result import MTRResult + + +def parse_mtr_output(raw: str, port: int, protocol: IPProtocol) -> MTRResult: + data = parse_mtr_raw_output(raw) + data["port"] = port + data["protocol"] = protocol + return MTRResult.model_validate(data) + + +def parse_mtr_raw_output(raw: str) -> Dict: + data = json.loads(raw)["report"] + data.update(data.pop("mtr")) + data["hops"] = data.pop("hubs") + return data diff --git a/generalresearch/models/network/mtr/result.py b/generalresearch/models/network/mtr/result.py new file mode 100644 index 0000000..5c775b4 --- /dev/null +++ b/generalresearch/models/network/mtr/result.py @@ -0,0 +1,167 @@ +import re +from functools import cached_property +from ipaddress import ip_address +from typing import List, Optional + +import tldextract +from pydantic import ( + Field, + field_validator, + BaseModel, + ConfigDict, + model_validator, + computed_field, +) + +from generalresearch.models.network.definitions import IPProtocol, get_ip_kind, IPKind + +HOST_RE = re.compile(r"^(?P<hostname>.+?) \((?P<ip>[^)]+)\)$") + + +class MTRHop(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + hop: int = Field(alias="count") + host: str + asn: Optional[int] = Field(default=None, alias="ASN") + + loss_pct: float = Field(alias="Loss%") + sent: int = Field(alias="Snt") + + last_ms: float = Field(alias="Last") + avg_ms: float = Field(alias="Avg") + best_ms: float = Field(alias="Best") + worst_ms: float = Field(alias="Wrst") + stdev_ms: float = Field(alias="StDev") + + hostname: Optional[str] = Field( + default=None, examples=["fixed-187-191-8-145.totalplay.net"] + ) + ip: Optional[str] = None + + @field_validator("asn", mode="before") + @classmethod + def normalize_asn(cls, v: str): + if v is None or v == "AS???": + return None + if type(v) is int: + return v + return int(v.replace("AS", "")) + + @model_validator(mode="after") + def parse_host(self): + host = self.host.strip() + + # hostname (ip) + m = HOST_RE.match(host) + if m: + self.hostname = m.group("hostname") + self.ip = m.group("ip") + return self + + # ip only + try: + ip_address(host) + self.ip = host + self.hostname = None + return self + except ValueError: + pass + + # hostname only + self.hostname = host + self.ip = None + return self + + @cached_property + def ip_kind(self) -> Optional[IPKind]: + return get_ip_kind(self.ip) + + @cached_property + def icmp_rate_limited(self): + if self.avg_ms == 0: + return False + return self.stdev_ms > self.avg_ms or self.worst_ms > self.best_ms * 10 + + @computed_field(examples=["totalplay.net"]) + @cached_property + def domain(self) -> Optional[str]: + if self.hostname: + return tldextract.extract(self.hostname).top_domain_under_public_suffix + + def model_dump_postgres(self, run_id: int): + # Writes for the network_mtrhop table + d = {"mtr_run_id": run_id} + data = self.model_dump( + mode="json", + include={ + "hop", + "ip", + "domain", + "asn", + }, + ) + d.update(data) + return d + + +class MTRResult(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + source: str = Field(description="Hostname of the system running mtr.", alias="src") + destination: str = Field( + description="Destination hostname or IP being traced.", alias="dst" + ) + tos: int = Field(description="IP Type-of-Service (TOS) value used for probes.") + tests: int = Field(description="Number of probes sent per hop.") + psize: int = Field(description="Probe packet size in bytes.") + bitpattern: str = Field(description="Payload byte pattern used in probes (hex).") + + # Protocol used for the traceroute + protocol: IPProtocol = Field(default=IPProtocol.ICMP) + # The target port number for TCP/SCTP/UDP traces + port: Optional[int] = Field(default=None) + + hops: List[MTRHop] = Field() + + def model_dump_postgres(self): + # Writes for the network_mtr table + d = self.model_dump( + mode="json", + include={"port"}, + ) + d["protocol"] = self.protocol.to_number() + d["parsed"] = self.model_dump_json(indent=0) + return d + + def print_report(self) -> None: + print( + f"MTR Report → {self.destination} {self.protocol.name} {self.port or ''}\n" + ) + host_max_len = max(len(h.host) for h in self.hops) + + header = ( + f"{'Hop':>3} " + f"{'Host':<{host_max_len}} " + f"{'Kind':<10} " + f"{'ASN':<8} " + f"{'Loss%':>6} {'Sent':>5} " + f"{'Last':>7} {'Avg':>7} {'Best':>7} {'Worst':>7} {'StDev':>7}" + ) + print(header) + print("-" * len(header)) + + for hop in self.hops: + print( + f"{hop.hop:>3} " + f"{hop.host:<{host_max_len}} " + f"{hop.ip_kind or '???':<10} " + f"{hop.asn or '???':<8} " + f"{hop.loss_pct:6.1f} " + f"{hop.sent:5d} " + f"{hop.last_ms:7.1f} " + f"{hop.avg_ms:7.1f} " + f"{hop.best_ms:7.1f} " + f"{hop.worst_ms:7.1f} " + f"{hop.stdev_ms:7.1f}" + ) diff --git a/generalresearch/models/network/nmap/__init__.py b/generalresearch/models/network/nmap/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/generalresearch/models/network/nmap/__init__.py diff --git a/generalresearch/models/network/nmap/command.py b/generalresearch/models/network/nmap/command.py new file mode 100644 index 0000000..47e0a87 --- /dev/null +++ b/generalresearch/models/network/nmap/command.py @@ -0,0 +1,47 @@ +import subprocess +from typing import Optional, List + +from generalresearch.models.network.nmap.parser import parse_nmap_xml +from generalresearch.models.network.nmap.result import NmapResult +from generalresearch.models.network.tool_run_command import NmapRunCommand + + +def build_nmap_command( + ip: str, + no_ping: bool = True, + enable_advanced: bool = True, + timing: int = 4, + ports: Optional[str] = None, + top_ports: Optional[int] = None, +) -> str: + # e.g. "nmap -Pn -T4 -A --top-ports 1000 -oX - scanme.nmap.org" + # https://linux.die.net/man/1/nmap + args = ["nmap"] + assert 0 <= timing <= 5 + args.append(f"-T{timing}") + if no_ping: + args.append("-Pn") + if enable_advanced: + args.append("-A") + if ports is not None: + assert top_ports is None + args.extend(["-p", ports]) + if top_ports is not None: + assert ports is None + args.extend(["--top-ports", str(top_ports)]) + + args.extend(["-oX", "-", ip]) + return " ".join(args) + + +def run_nmap(config: NmapRunCommand) -> NmapResult: + cmd = config.to_command_str() + args = cmd.split(" ") + proc = subprocess.run( + args, + capture_output=True, + text=True, + check=False, + ) + raw = proc.stdout.strip() + return parse_nmap_xml(raw) diff --git a/generalresearch/models/network/nmap/execute.py b/generalresearch/models/network/nmap/execute.py new file mode 100644 index 0000000..0334f50 --- /dev/null +++ b/generalresearch/models/network/nmap/execute.py @@ -0,0 +1,50 @@ +from typing import Optional +from uuid import uuid4 + +from generalresearch.models.custom_types import UUIDStr +from generalresearch.models.network.nmap.command import run_nmap +from generalresearch.models.network.tool_run import NmapRun, ToolName, ToolClass, Status +from generalresearch.models.network.tool_run_command import ( + NmapRunCommand, + NmapRunCommandOptions, +) + + +def execute_nmap( + ip: str, + top_ports: Optional[int] = 1000, + ports: Optional[str] = None, + no_ping: bool = True, + enable_advanced: bool = True, + timing: int = 4, + scan_group_id: Optional[UUIDStr] = None, +): + config = NmapRunCommand( + options=NmapRunCommandOptions( + top_ports=top_ports, + ports=ports, + no_ping=no_ping, + enable_advanced=enable_advanced, + timing=timing, + ip=ip, + ) + ) + result = run_nmap(config) + assert result.exit_status == "success" + assert result.target_ip == ip, f"{result.target_ip=}, {ip=}" + assert result.command_line == config.to_command_str() + + run = NmapRun( + tool_name=ToolName.NMAP, + tool_class=ToolClass.PORT_SCAN, + tool_version=result.version, + status=Status.SUCCESS, + ip=ip, + started_at=result.started_at, + finished_at=result.finished_at, + raw_command=result.command_line, + scan_group_id=scan_group_id or uuid4().hex, + config=config, + parsed=result, + ) + return run diff --git a/generalresearch/models/network/nmap/parser.py b/generalresearch/models/network/nmap/parser.py new file mode 100644 index 0000000..5a441bb --- /dev/null +++ b/generalresearch/models/network/nmap/parser.py @@ -0,0 +1,412 @@ +import xml.etree.cElementTree as ET +from datetime import datetime, timezone +from typing import List, Dict, Any, Tuple, Optional + +from generalresearch.models.network.definitions import IPProtocol +from generalresearch.models.network.nmap.result import ( + NmapHostname, + NmapResult, + NmapPort, + PortState, + PortStateReason, + NmapService, + NmapScript, + NmapPortStats, + NmapScanType, + NmapHostState, + NmapHostStatusReason, + NmapHostScript, + NmapOSMatch, + NmapOSClass, + NmapTrace, + NmapTraceHop, + NmapScanInfo, +) + + +class NmapParserException(Exception): + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return self.msg + + +class NmapXmlParser: + """ + Example: https://nmap.org/book/output-formats-xml-output.html + Full DTD: https://nmap.org/book/nmap-dtd.html + """ + + @classmethod + def parse_xml(cls, nmap_data: str) -> NmapResult: + """ + Expects a full nmap scan report. + """ + + try: + root = ET.fromstring(nmap_data) + except Exception as e: + emsg = "Wrong XML structure: cannot parse data: {0}".format(e) + raise NmapParserException(emsg) + + if root.tag != "nmaprun": + raise NmapParserException("Unpexpected data structure for XML " "root node") + return cls._parse_xml_nmaprun(root) + + @classmethod + def _parse_xml_nmaprun(cls, root: ET.Element) -> NmapResult: + """ + This method parses out a full nmap scan report from its XML root + node: <nmaprun>. We expect there is only 1 host in this report! + + :param root: Element from xml.ElementTree (top of XML the document) + """ + cls._validate_nmap_root(root) + host_count = len(root.findall(".//host")) + assert host_count == 1, f"Expected 1 host, got {host_count}" + + xml_str = ET.tostring(root, encoding="unicode").replace("\n", "") + nmap_data = {"raw_xml": xml_str} + nmap_data.update(cls._parse_nmaprun(root)) + + nmap_data["scan_infos"] = [ + cls._parse_scaninfo(scaninfo_el) + for scaninfo_el in root.findall(".//scaninfo") + ] + + nmap_data.update(cls._parse_runstats(root)) + + nmap_data.update(cls._parse_xml_host(root.find(".//host"))) + + return NmapResult.model_validate(nmap_data) + + @classmethod + def _validate_nmap_root(cls, root: ET.Element) -> None: + allowed = { + "scaninfo", + "host", + "runstats", + "verbose", + "debugging", + "taskprogress", + } + + found = {child.tag for child in root} + unexpected = found - allowed + if unexpected: + raise ValueError( + f"Unexpected top-level tags in nmap XML: {sorted(unexpected)}" + ) + + @classmethod + def _parse_scaninfo(cls, scaninfo_el: ET.Element) -> NmapScanInfo: + data = dict() + data["type"] = NmapScanType(scaninfo_el.attrib["type"]) + data["protocol"] = IPProtocol(scaninfo_el.attrib["protocol"]) + data["num_services"] = scaninfo_el.attrib["numservices"] + data["services"] = scaninfo_el.attrib["services"] + return NmapScanInfo.model_validate(data) + + @classmethod + def _parse_runstats(cls, root: ET.Element) -> Dict: + runstats = root.find("runstats") + if runstats is None: + return {} + + finished = runstats.find("finished") + if finished is None: + return {} + + finished_at = None + ts = finished.attrib.get("time") + if ts: + finished_at = datetime.fromtimestamp(int(ts), tz=timezone.utc) + + return { + "finished_at": finished_at, + "exit_status": finished.attrib.get("exit"), + } + + @classmethod + def _parse_nmaprun(cls, nmaprun_el: ET.Element) -> Dict: + nmap_data = dict() + nmaprun = dict(nmaprun_el.attrib) + nmap_data["command_line"] = nmaprun["args"] + nmap_data["started_at"] = datetime.fromtimestamp( + float(nmaprun["start"]), tz=timezone.utc + ) + nmap_data["version"] = nmaprun["version"] + nmap_data["xmloutputversion"] = nmaprun["xmloutputversion"] + return nmap_data + + @classmethod + def _parse_xml_host(cls, host_el: ET.Element) -> Dict: + """ + Receives a <host> XML tag representing a scanned host with + its services. + """ + data = dict() + + # <status state="up" reason="user-set" reason_ttl="0"/> + status_el = host_el.find("status") + data["host_state"] = NmapHostState(status_el.attrib["state"]) + data["host_state_reason"] = NmapHostStatusReason(status_el.attrib["reason"]) + host_state_reason_ttl = status_el.attrib.get("reason_ttl") + if host_state_reason_ttl: + data["host_state_reason_ttl"] = int(host_state_reason_ttl) + + # <address addr="108.171.53.1" addrtype="ipv4"/> + address_el = host_el.find("address") + data["target_ip"] = address_el.attrib["addr"] + + data["hostnames"] = cls._parse_hostnames(host_el.find("hostnames")) + + data["ports"], data["port_stats"] = cls._parse_xml_ports(host_el.find("ports")) + + uptime = host_el.find("uptime") + if uptime is not None: + data["uptime_seconds"] = int(uptime.attrib["seconds"]) + + distance = host_el.find("distance") + if distance is not None: + data["distance"] = int(distance.attrib["value"]) + + tcpsequence = host_el.find("tcpsequence") + if tcpsequence is not None: + data["tcp_sequence_index"] = int(tcpsequence.attrib["index"]) + data["tcp_sequence_difficulty"] = tcpsequence.attrib["difficulty"] + ipidsequence = host_el.find("ipidsequence") + if ipidsequence is not None: + data["ipid_sequence_class"] = ipidsequence.attrib["class"] + tcptssequence = host_el.find("tcptssequence") + if tcptssequence is not None: + data["tcp_timestamp_class"] = tcptssequence.attrib["class"] + + times_elem = host_el.find("times") + if times_elem is not None: + data.update( + { + "srtt_us": int(times_elem.attrib.get("srtt", 0)) or None, + "rttvar_us": int(times_elem.attrib.get("rttvar", 0)) or None, + "timeout_us": int(times_elem.attrib.get("to", 0)) or None, + } + ) + + hostscripts_el = host_el.find("hostscript") + if hostscripts_el is not None: + data["host_scripts"] = [ + NmapHostScript(id=el.attrib["id"], output=el.attrib.get("output")) + for el in hostscripts_el.findall("script") + ] + + data["os_matches"] = cls._parse_os_matches(host_el) + + data["trace"] = cls._parse_trace(host_el) + + return data + + @classmethod + def _parse_os_matches(cls, host_el: ET.Element) -> List[NmapOSMatch] | None: + os_elem = host_el.find("os") + if os_elem is None: + return None + + matches: List[NmapOSMatch] = [] + + for m in os_elem.findall("osmatch"): + classes: List[NmapOSClass] = [] + + for c in m.findall("osclass"): + cpes = [e.text.strip() for e in c.findall("cpe") if e.text] + + classes.append( + NmapOSClass( + vendor=c.attrib.get("vendor"), + osfamily=c.attrib.get("osfamily"), + osgen=c.attrib.get("osgen"), + accuracy=( + int(c.attrib["accuracy"]) + if "accuracy" in c.attrib + else None + ), + cpe=cpes or None, + ) + ) + + matches.append( + NmapOSMatch( + name=m.attrib["name"], + accuracy=int(m.attrib["accuracy"]), + classes=classes, + ) + ) + + return matches or None + + @classmethod + def _parse_hostnames(cls, hostnames_el: ET.Element) -> List[NmapHostname]: + """ + Parses the hostnames element. + e.g. <hostnames> + <hostname name="108-171-53-1.aceips.com" type="PTR"/> + </hostnames> + """ + return [ + cls._parse_hostname(hname) for hname in hostnames_el.findall("hostname") + ] + + @classmethod + def _parse_hostname(cls, hostname_el: ET.Element) -> NmapHostname: + """ + Parses the hostname element. + e.g. <hostname name="108-171-53-1.aceips.com" type="PTR"/> + + :param hostname_el: <hostname> XML tag from a nmap scan + """ + return NmapHostname.model_validate(dict(hostname_el.attrib)) + + @classmethod + def _parse_xml_ports( + cls, ports_elem: ET.Element + ) -> Tuple[List[NmapPort], NmapPortStats]: + """ + Parses the list of scanned services from a targeted host. + """ + ports: List[NmapPort] = [] + stats = NmapPortStats() + + # handle extraports first + for e in ports_elem.findall("extraports"): + state = PortState(e.attrib["state"]) + count = int(e.attrib["count"]) + + key = state.value.replace("|", "_") + setattr(stats, key, getattr(stats, key) + count) + + for port_elem in ports_elem.findall("port"): + port = cls._parse_xml_port(port_elem) + ports.append(port) + key = port.state.value.replace("|", "_") + setattr(stats, key, getattr(stats, key) + 1) + return ports, stats + + @classmethod + def _parse_xml_service(cls, service_elem: ET.Element) -> NmapService: + svc = { + "name": service_elem.attrib.get("name"), + "product": service_elem.attrib.get("product"), + "version": service_elem.attrib.get("version"), + "extrainfo": service_elem.attrib.get("extrainfo"), + "method": service_elem.attrib.get("method"), + "conf": ( + int(service_elem.attrib["conf"]) + if "conf" in service_elem.attrib + else None + ), + "cpe": [e.text.strip() for e in service_elem.findall("cpe")], + } + + return NmapService.model_validate(svc) + + @classmethod + def _parse_xml_script(cls, script_elem: ET.Element) -> NmapScript: + output = script_elem.attrib.get("output") + if output: + output = output.strip() + script = { + "id": script_elem.attrib["id"], + "output": output, + } + + elements: Dict[str, Any] = {} + + # handle <elem key="...">value</elem> + for elem in script_elem.findall(".//elem"): + key = elem.attrib.get("key") + if key: + elements[key.strip()] = elem.text.strip() + + script["elements"] = elements + return NmapScript.model_validate(script) + + @classmethod + def _parse_xml_port(cls, port_elem: ET.Element) -> NmapPort: + """ + <port protocol="tcp" portid="61232"> + <state state="open" reason="syn-ack" reason_ttl="47"/> + <service name="socks5" extrainfo="Username/password authentication required" method="probed" conf="10"/> + <script id="socks-auth-info" output="
 Username and password"> + <table> + <elem key="name">Username and password</elem> + <elem key="method">2</elem> + </table> + </script> + </port> + """ + state_elem = port_elem.find("state") + + port = { + "port": int(port_elem.attrib["portid"]), + "protocol": port_elem.attrib["protocol"], + "state": PortState(state_elem.attrib["state"]), + "reason": ( + PortStateReason(state_elem.attrib["reason"]) + if "reason" in state_elem.attrib + else None + ), + "reason_ttl": ( + int(state_elem.attrib["reason_ttl"]) + if "reason_ttl" in state_elem.attrib + else None + ), + } + + service_elem = port_elem.find("service") + if service_elem is not None: + port["service"] = cls._parse_xml_service(service_elem) + + port["scripts"] = [] + for script_elem in port_elem.findall("script"): + port["scripts"].append(cls._parse_xml_script(script_elem)) + + return NmapPort.model_validate(port) + + @classmethod + def _parse_trace(cls, host_elem: ET.Element) -> Optional[NmapTrace]: + trace_elem = host_elem.find("trace") + if trace_elem is None: + return None + + port_attr = trace_elem.attrib.get("port") + proto_attr = trace_elem.attrib.get("proto") + + hops: List[NmapTraceHop] = [] + + for hop_elem in trace_elem.findall("hop"): + ttl = hop_elem.attrib.get("ttl") + if ttl is None: + continue # ttl is required by the DTD but guard anyway + + rtt = hop_elem.attrib.get("rtt") + ipaddr = hop_elem.attrib.get("ipaddr") + host = hop_elem.attrib.get("host") + + hops.append( + NmapTraceHop( + ttl=int(ttl), + ipaddr=ipaddr, + rtt_ms=float(rtt) if rtt is not None else None, + host=host, + ) + ) + + return NmapTrace( + port=int(port_attr) if port_attr is not None else None, + protocol=IPProtocol(proto_attr) if proto_attr is not None else None, + hops=hops, + ) + + +def parse_nmap_xml(raw): + return NmapXmlParser.parse_xml(raw) diff --git a/generalresearch/models/network/nmap/result.py b/generalresearch/models/network/nmap/result.py new file mode 100644 index 0000000..3ff2376 --- /dev/null +++ b/generalresearch/models/network/nmap/result.py @@ -0,0 +1,432 @@ +import json +from datetime import timedelta +from enum import StrEnum +from functools import cached_property +from typing import Dict, Any, Literal, List, Optional, Tuple, Set + +from pydantic import computed_field, BaseModel, Field + +from generalresearch.models.custom_types import AwareDatetimeISO, IPvAnyAddressStr +from generalresearch.models.network.definitions import IPProtocol + + +class PortState(StrEnum): + OPEN = "open" + CLOSED = "closed" + FILTERED = "filtered" + UNFILTERED = "unfiltered" + OPEN_FILTERED = "open|filtered" + CLOSED_FILTERED = "closed|filtered" + # Added by me, does not get returned. Used for book-keeping + NOT_SCANNED = "not_scanned" + + +class PortStateReason(StrEnum): + SYN_ACK = "syn-ack" + RESET = "reset" + CONN_REFUSED = "conn-refused" + NO_RESPONSE = "no-response" + SYN = "syn" + FIN = "fin" + + ICMP_NET_UNREACH = "net-unreach" + ICMP_HOST_UNREACH = "host-unreach" + ICMP_PROTO_UNREACH = "proto-unreach" + ICMP_PORT_UNREACH = "port-unreach" + + ADMIN_PROHIBITED = "admin-prohibited" + HOST_PROHIBITED = "host-prohibited" + NET_PROHIBITED = "net-prohibited" + + ECHO_REPLY = "echo-reply" + TIME_EXCEEDED = "time-exceeded" + + +class NmapScanType(StrEnum): + SYN = "syn" + CONNECT = "connect" + ACK = "ack" + WINDOW = "window" + MAIMON = "maimon" + FIN = "fin" + NULL = "null" + XMAS = "xmas" + UDP = "udp" + SCTP_INIT = "sctpinit" + SCTP_COOKIE_ECHO = "sctpcookieecho" + + +class NmapHostState(StrEnum): + UP = "up" + DOWN = "down" + UNKNOWN = "unknown" + + +class NmapHostStatusReason(StrEnum): + USER_SET = "user-set" + SYN_ACK = "syn-ack" + RESET = "reset" + ECHO_REPLY = "echo-reply" + ARP_RESPONSE = "arp-response" + NO_RESPONSE = "no-response" + NET_UNREACH = "net-unreach" + HOST_UNREACH = "host-unreach" + PROTO_UNREACH = "proto-unreach" + PORT_UNREACH = "port-unreach" + ADMIN_PROHIBITED = "admin-prohibited" + LOCALHOST_RESPONSE = "localhost-response" + + +class NmapOSClass(BaseModel): + vendor: str = None + osfamily: str = None + osgen: Optional[str] = None + accuracy: int = None + cpe: Optional[List[str]] = None + + +class NmapOSMatch(BaseModel): + name: str + accuracy: int + classes: List[NmapOSClass] = Field(default_factory=list) + + @property + def best_class(self) -> Optional[NmapOSClass]: + if not self.classes: + return None + return max(self.classes, key=lambda m: m.accuracy) + + +class NmapScript(BaseModel): + """ + <script id="socks-auth-info" output="
 Username and password"> + <table> + <elem key="name">Username and password</elem> + <elem key="method">2</elem> + </table> + </script> + """ + + id: str + output: Optional[str] = None + elements: Dict[str, Any] = Field(default_factory=dict) + + +class NmapService(BaseModel): + # <service name="socks5" extrainfo="Username/password authentication required" method="probed" conf="10"/> + name: Optional[str] = None + product: Optional[str] = None + version: Optional[str] = None + extrainfo: Optional[str] = None + method: Optional[str] = None + conf: Optional[int] = None + cpe: List[str] = Field(default_factory=list) + + def model_dump_postgres(self): + d = self.model_dump(mode="json") + d["service_name"] = self.name + return d + + +class NmapPort(BaseModel): + port: int = Field() + protocol: IPProtocol = Field() + # Closed ports will not have a NmapPort record + state: PortState = Field() + reason: Optional[PortStateReason] = Field(default=None) + reason_ttl: Optional[int] = Field(default=None) + + service: Optional[NmapService] = None + scripts: List[NmapScript] = Field(default_factory=list) + + def model_dump_postgres(self, run_id: int): + # Writes for the network_portscanport table + d = {"port_scan_id": run_id} + data = self.model_dump( + mode="json", + include={ + "port", + "state", + "reason", + "reason_ttl", + }, + ) + d.update(data) + d["protocol"] = self.protocol.to_number() + if self.service: + d.update(self.service.model_dump_postgres()) + return d + + +class NmapHostScript(BaseModel): + id: str = Field() + output: Optional[str] = Field(default=None) + + +class NmapTraceHop(BaseModel): + """ + One hop observed during Nmap's traceroute. + + Example XML: + <hop ttl="7" ipaddr="62.115.192.20" rtt="17.17" host="gdl-b2-link.ip.twelve99.net"/> + """ + + ttl: int = Field() + + ipaddr: Optional[str] = Field( + default=None, + description="IP address of the responding router or host", + ) + + rtt_ms: Optional[float] = Field( + default=None, + description="Round-trip time in milliseconds for the probe reaching this hop.", + ) + + host: Optional[str] = Field( + default=None, + description="Reverse DNS hostname for the hop if Nmap resolved one.", + ) + + +class NmapTrace(BaseModel): + """ + Traceroute information collected by Nmap. + + Nmap performs a single traceroute per host using probes matching the scan + type (typically TCP) directed at a chosen destination port. + + Example XML: + <trace port="61232" proto="tcp"> + <hop ttl="1" ipaddr="192.168.86.1" rtt="3.83"/> + ... + </trace> + """ + + port: Optional[int] = Field( + default=None, + description="Destination port used for traceroute probes (may be absent depending on scan type).", + ) + protocol: Optional[IPProtocol] = Field( + default=None, + description="Transport protocol used for the traceroute probes (tcp, udp, etc.).", + ) + + hops: List[NmapTraceHop] = Field( + default_factory=list, + description="Ordered list of hops observed during the traceroute.", + ) + + @property + def destination(self) -> Optional[NmapTraceHop]: + return self.hops[-1] if self.hops else None + + +class NmapHostname(BaseModel): + # <hostname name="108-171-53-1.aceips.com" type="PTR"/> + name: str + type: Optional[Literal["PTR", "user"]] = None + + +class NmapPortStats(BaseModel): + """ + This is counts across all protocols scanned (tcp/udp) + """ + + open: int = 0 + closed: int = 0 + filtered: int = 0 + unfiltered: int = 0 + open_filtered: int = 0 + closed_filtered: int = 0 + + +class NmapScanInfo(BaseModel): + """ + We could have multiple protocols in one run. + <scaninfo type="syn" protocol="tcp" numservices="983" services="22-1000,1100,3389,11000,61232"/> + <scaninfo type="syn" protocol="udp" numservices="983" services="1100"/> + """ + + type: NmapScanType = Field() + protocol: IPProtocol = Field() + num_services: int = Field() + services: str = Field() + + @cached_property + def port_set(self) -> Set[int]: + """ + Expand the Nmap services string into a set of port numbers. + Example: + "22-25,80,443" -> {22,23,24,25,80,443} + """ + ports: Set[int] = set() + for part in self.services.split(","): + if "-" in part: + start, end = part.split("-", 1) + ports.update(range(int(start), int(end) + 1)) + else: + ports.add(int(part)) + return ports + + +class NmapResult(BaseModel): + """ + A Nmap Run. Expects that we've only scanned ONE host. + """ + + command_line: str = Field() + started_at: AwareDatetimeISO = Field() + version: str = Field() + xmloutputversion: Literal["1.04"] = Field() + + scan_infos: List[NmapScanInfo] = Field(min_length=1) + + # comes from <runstats> + finished_at: Optional[AwareDatetimeISO] = Field(default=None) + exit_status: Optional[Literal["success", "error"]] = Field(default=None) + + ##### + # Everything below here is from within the *single* host we've scanned + ##### + + # <status state="up" reason="user-set" reason_ttl="0"/> + host_state: NmapHostState = Field() + host_state_reason: NmapHostStatusReason = Field() + host_state_reason_ttl: Optional[int] = None + + # <address addr="108.171.53.1" addrtype="ipv4"/> + target_ip: IPvAnyAddressStr = Field() + + hostnames: List[NmapHostname] = Field() + + ports: List[NmapPort] = [] + port_stats: NmapPortStats = Field() + + # <uptime seconds="4063775" lastboot="Fri Jan 16 12:12:06 2026"/> + uptime_seconds: Optional[int] = Field(default=None) + # <distance value="11"/> + distance: Optional[int] = Field(description="approx number of hops", default=None) + + # <tcpsequence index="263" difficulty="Good luck!"> + tcp_sequence_index: Optional[int] = None + tcp_sequence_difficulty: Optional[str] = None + + # <ipidsequence class="All zeros"> + ipid_sequence_class: Optional[str] = None + + # <tcptssequence class="1000HZ" > + tcp_timestamp_class: Optional[str] = None + + # <times srtt="54719" rttvar="23423" to="148411"/> + srtt_us: Optional[int] = Field( + default=None, description="smoothed RTT estimate (microseconds µs)" + ) + rttvar_us: Optional[int] = Field( + default=None, description="RTT variance (microseconds µs)" + ) + timeout_us: Optional[int] = Field( + default=None, description="probe timeout (microseconds µs)" + ) + + os_matches: Optional[List[NmapOSMatch]] = Field(default=None) + + host_scripts: List[NmapHostScript] = Field(default_factory=list) + + trace: Optional[NmapTrace] = Field(default=None) + + raw_xml: Optional[str] = None + + @computed_field + @property + def last_boot(self) -> Optional[AwareDatetimeISO]: + if self.uptime_seconds: + return self.started_at - timedelta(seconds=self.uptime_seconds) + + @property + def scan_info_tcp(self): + return next( + filter(lambda x: x.protocol == IPProtocol.TCP, self.scan_infos), None + ) + + @property + def scan_info_udp(self): + return next( + filter(lambda x: x.protocol == IPProtocol.UDP, self.scan_infos), None + ) + + @property + def latency_ms(self) -> Optional[float]: + return self.srtt_us / 1000 if self.srtt_us is not None else None + + @property + def best_os_match(self) -> Optional[NmapOSMatch]: + if not self.os_matches: + return None + return max(self.os_matches, key=lambda m: m.accuracy) + + def filter_ports(self, protocol: IPProtocol, state: PortState) -> List[NmapPort]: + return [p for p in self.ports if p.protocol == protocol and p.state == state] + + @property + def tcp_open_ports(self) -> List[int]: + """ + Returns a list of open TCP port numbers. + """ + return [ + p.port + for p in self.filter_ports(protocol=IPProtocol.TCP, state=PortState.OPEN) + ] + + @property + def udp_open_ports(self) -> List[int]: + """ + Returns a list of open UDP port numbers. + """ + return [ + p.port + for p in self.filter_ports(protocol=IPProtocol.UDP, state=PortState.OPEN) + ] + + @cached_property + def _port_index(self) -> Dict[Tuple[IPProtocol, int], NmapPort]: + return {(p.protocol, p.port): p for p in self.ports} + + def get_port_state( + self, port: int, protocol: IPProtocol = IPProtocol.TCP + ) -> PortState: + # Explicit (only if scanned and not closed) + if (protocol, port) in self._port_index: + return self._port_index[(protocol, port)].state + + # Check if we even scanned it + scaninfo = next((s for s in self.scan_infos if s.protocol == protocol), None) + if scaninfo and port in scaninfo.port_set: + return PortState.CLOSED + + # We didn't scan it + return PortState.NOT_SCANNED + + def model_dump_postgres(self): + # Writes for the network_portscan table + d = dict() + data = self.model_dump( + mode="json", + include={ + "started_at", + "host_state", + "host_state_reason", + "distance", + "uptime_seconds", + "raw_xml", + }, + ) + d.update(data) + d["ip"] = self.target_ip + d["xml_version"] = self.xmloutputversion + d["latency_ms"] = self.latency_ms + d["last_boot"] = self.last_boot + d["parsed"] = self.model_dump_json(indent=0) + d["open_tcp_ports"] = json.dumps(self.tcp_open_ports) + d["open_udp_ports"] = json.dumps(self.udp_open_ports) + return d diff --git a/generalresearch/models/network/rdns/__init__.py b/generalresearch/models/network/rdns/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/generalresearch/models/network/rdns/__init__.py diff --git a/generalresearch/models/network/rdns/command.py b/generalresearch/models/network/rdns/command.py new file mode 100644 index 0000000..aa48f2a --- /dev/null +++ b/generalresearch/models/network/rdns/command.py @@ -0,0 +1,35 @@ +import subprocess + +from generalresearch.models.network.rdns.parser import parse_rdns_output +from generalresearch.models.network.rdns.result import RDNSResult +from generalresearch.models.network.tool_run_command import RDNSRunCommand + + +def run_rdns(config: RDNSRunCommand) -> RDNSResult: + cmd = config.to_command_str() + args = cmd.split(" ") + proc = subprocess.run( + args, + capture_output=True, + text=True, + check=False, + ) + raw = proc.stdout.strip() + return parse_rdns_output(ip=config.options.ip, raw=raw) + + +def build_rdns_command(ip: str) -> str: + # e.g. dig +noall +answer -x 1.2.3.4 + return " ".join(["dig", "+noall", "+answer", "-x", ip]) + + +def get_dig_version() -> str: + proc = subprocess.run( + ["dig", "-v"], + capture_output=True, + text=True, + check=False, + ) + # e.g. DiG 9.18.39-0ubuntu0.22.04.2-Ubuntu + ver_str = proc.stderr.strip() + return ver_str.split("-", 1)[0].split(" ", 1)[1] diff --git a/generalresearch/models/network/rdns/execute.py b/generalresearch/models/network/rdns/execute.py new file mode 100644 index 0000000..03a5080 --- /dev/null +++ b/generalresearch/models/network/rdns/execute.py @@ -0,0 +1,44 @@ +from datetime import datetime, timezone +from typing import Optional +from uuid import uuid4 + +from generalresearch.models.custom_types import UUIDStr +from generalresearch.models.network.rdns.command import ( + run_rdns, + get_dig_version, + build_rdns_command, +) +from generalresearch.models.network.tool_run import ( + ToolName, + ToolClass, + Status, + RDNSRun, +) +from generalresearch.models.network.tool_run_command import ( + RDNSRunCommand, + RDNSRunCommandOptions, +) + + +def execute_rdns(ip: str, scan_group_id: Optional[UUIDStr] = None): + started_at = datetime.now(tz=timezone.utc) + tool_version = get_dig_version() + config = RDNSRunCommand(options=RDNSRunCommandOptions(ip=ip)) + result = run_rdns(config) + finished_at = datetime.now(tz=timezone.utc) + + run = RDNSRun( + tool_name=ToolName.DIG, + tool_class=ToolClass.RDNS, + tool_version=tool_version, + status=Status.SUCCESS, + ip=ip, + started_at=started_at, + finished_at=finished_at, + raw_command=config.to_command_str(), + scan_group_id=scan_group_id or uuid4().hex, + config=config, + parsed=result, + ) + + return run diff --git a/generalresearch/models/network/rdns/parser.py b/generalresearch/models/network/rdns/parser.py new file mode 100644 index 0000000..231949e --- /dev/null +++ b/generalresearch/models/network/rdns/parser.py @@ -0,0 +1,22 @@ +import ipaddress +import re +from typing import List + +from generalresearch.models.custom_types import IPvAnyAddressStr +from generalresearch.models.network.rdns.result import RDNSResult + +PTR_RE = re.compile(r"\sPTR\s+([^\s]+)\.") + + +def parse_rdns_output(ip: IPvAnyAddressStr, raw:str): + hostnames: List[str] = [] + + for line in raw.splitlines(): + m = PTR_RE.search(line) + if m: + hostnames.append(m.group(1)) + + return RDNSResult( + ip=ipaddress.ip_address(ip), + hostnames=hostnames, + ) diff --git a/generalresearch/models/network/rdns/result.py b/generalresearch/models/network/rdns/result.py new file mode 100644 index 0000000..81b4085 --- /dev/null +++ b/generalresearch/models/network/rdns/result.py @@ -0,0 +1,51 @@ +import json +from functools import cached_property +from typing import Optional, List + +import tldextract +from pydantic import BaseModel, Field, model_validator, computed_field + +from generalresearch.models.custom_types import IPvAnyAddressStr + + +class RDNSResult(BaseModel): + + ip: IPvAnyAddressStr = Field() + + hostnames: List[str] = Field(default_factory=list) + + @model_validator(mode="after") + def validate_hostname_prop(self): + assert len(self.hostnames) == self.hostname_count + if self.hostnames: + assert self.hostnames[0] == self.primary_hostname + assert self.primary_domain in self.primary_hostname + return self + + @computed_field(examples=["fixed-187-191-8-145.totalplay.net"]) + @cached_property + def primary_hostname(self) -> Optional[str]: + if self.hostnames: + return self.hostnames[0] + + @computed_field(examples=[1]) + @cached_property + def hostname_count(self) -> int: + return len(self.hostnames) + + @computed_field(examples=["totalplay.net"]) + @cached_property + def primary_domain(self) -> Optional[str]: + if self.primary_hostname: + return tldextract.extract( + self.primary_hostname + ).top_domain_under_public_suffix + + def model_dump_postgres(self): + # Writes for the network_rdnsresult table + d = self.model_dump( + mode="json", + include={"primary_hostname", "primary_domain", "hostname_count"}, + ) + d["hostnames"] = json.dumps(self.hostnames) + return d diff --git a/generalresearch/models/network/tool_run.py b/generalresearch/models/network/tool_run.py new file mode 100644 index 0000000..114d4b6 --- /dev/null +++ b/generalresearch/models/network/tool_run.py @@ -0,0 +1,116 @@ +from enum import StrEnum +from typing import Optional, Literal +from uuid import uuid4 + +from pydantic import BaseModel, Field, PositiveInt + +from generalresearch.models.custom_types import ( + AwareDatetimeISO, + IPvAnyAddressStr, + UUIDStr, +) +from generalresearch.models.network.mtr.result import MTRResult +from generalresearch.models.network.nmap.result import NmapResult +from generalresearch.models.network.rdns.result import RDNSResult +from generalresearch.models.network.tool_run_command import ( + ToolRunCommand, + NmapRunCommand, + RDNSRunCommand, + MTRRunCommand, +) + + +class ToolClass(StrEnum): + PORT_SCAN = "port_scan" + RDNS = "rdns" + PING = "ping" + TRACEROUTE = "traceroute" + + +class ToolName(StrEnum): + NMAP = "nmap" + RUSTMAP = "rustmap" + DIG = "dig" + PING = "ping" + TRACEROUTE = "traceroute" + MTR = "mtr" + + +class Status(StrEnum): + SUCCESS = "success" + FAILED = "failed" + TIMEOUT = "timeout" + ERROR = "error" + + +class ToolRun(BaseModel): + """ + A run of a networking tool against one host/ip. + """ + + id: Optional[PositiveInt] = Field(default=None) + + ip: IPvAnyAddressStr = Field() + scan_group_id: UUIDStr = Field(default_factory=lambda: uuid4().hex) + tool_class: ToolClass = Field() + tool_name: ToolName = Field() + tool_version: str = Field() + + started_at: AwareDatetimeISO = Field() + finished_at: Optional[AwareDatetimeISO] = Field(default=None) + status: Optional[Status] = Field(default=None) + + raw_command: str = Field() + + config: ToolRunCommand = Field() + + def model_dump_postgres(self): + d = self.model_dump(mode="json", exclude={"config"}) + d["config"] = self.config.model_dump_json() + return d + + +class NmapRun(ToolRun): + tool_class: Literal[ToolClass.PORT_SCAN] = Field(default=ToolClass.PORT_SCAN) + tool_name: Literal[ToolName.NMAP] = Field(default=ToolName.NMAP) + config: NmapRunCommand = Field() + + parsed: NmapResult = Field() + + def model_dump_postgres(self): + d = super().model_dump_postgres() + d["run_id"] = self.id + d.update(self.parsed.model_dump_postgres()) + return d + + +class RDNSRun(ToolRun): + tool_class: Literal[ToolClass.RDNS] = Field(default=ToolClass.RDNS) + tool_name: Literal[ToolName.DIG] = Field(default=ToolName.DIG) + config: RDNSRunCommand = Field() + + parsed: RDNSResult = Field() + + def model_dump_postgres(self): + d = super().model_dump_postgres() + d["run_id"] = self.id + d.update(self.parsed.model_dump_postgres()) + return d + + +class MTRRun(ToolRun): + tool_class: Literal[ToolClass.TRACEROUTE] = Field(default=ToolClass.TRACEROUTE) + tool_name: Literal[ToolName.MTR] = Field(default=ToolName.MTR) + config: MTRRunCommand = Field() + + facility_id: int = Field(default=1) + source_ip: IPvAnyAddressStr = Field() + parsed: MTRResult = Field() + + def model_dump_postgres(self): + d = super().model_dump_postgres() + d["run_id"] = self.id + d["source_ip"] = self.source_ip + d["facility_id"] = self.facility_id + d.update(self.parsed.model_dump_postgres()) + return d diff --git a/generalresearch/models/network/tool_run_command.py b/generalresearch/models/network/tool_run_command.py new file mode 100644 index 0000000..68d2070 --- /dev/null +++ b/generalresearch/models/network/tool_run_command.py @@ -0,0 +1,64 @@ +from typing import Dict, Optional, Literal + +from pydantic import BaseModel, Field + +from generalresearch.models.custom_types import IPvAnyAddressStr +from generalresearch.models.network.definitions import IPProtocol + + +class ToolRunCommand(BaseModel): + command: str = Field() + options: Dict[str, Optional[str | int]] = Field(default_factory=dict) + + +class NmapRunCommandOptions(BaseModel): + ip: IPvAnyAddressStr + top_ports: Optional[int] = Field(default=1000) + ports: Optional[str] = Field(default=None) + no_ping: bool = Field(default=True) + enable_advanced: bool = Field(default=True) + timing: int = Field(default=4) + + +class NmapRunCommand(ToolRunCommand): + command: Literal["nmap"] = Field(default="nmap") + options: NmapRunCommandOptions = Field() + + def to_command_str(self): + from generalresearch.models.network.nmap.command import build_nmap_command + + options = self.options + return build_nmap_command(**options.model_dump()) + + +class RDNSRunCommandOptions(BaseModel): + ip: IPvAnyAddressStr + + +class RDNSRunCommand(ToolRunCommand): + command: Literal["dig"] = Field(default="dig") + options: RDNSRunCommandOptions = Field() + + def to_command_str(self): + from generalresearch.models.network.rdns.command import build_rdns_command + + options = self.options + return build_rdns_command(**options.model_dump()) + + +class MTRRunCommandOptions(BaseModel): + ip: IPvAnyAddressStr = Field() + protocol: IPProtocol = Field(default=IPProtocol.ICMP) + port: Optional[int] = Field(default=None) + report_cycles: int = Field(default=10) + + +class MTRRunCommand(ToolRunCommand): + command: Literal["mtr"] = Field(default="mtr") + options: MTRRunCommandOptions = Field() + + def to_command_str(self): + from generalresearch.models.network.mtr.command import build_mtr_command + + options = self.options + return build_mtr_command(**options.model_dump()) diff --git a/generalresearch/models/network/utils.py b/generalresearch/models/network/utils.py new file mode 100644 index 0000000..fee9b80 --- /dev/null +++ b/generalresearch/models/network/utils.py @@ -0,0 +1,5 @@ +import requests + + +def get_source_ip(): + return requests.get("https://icanhazip.com?").text.strip() diff --git a/generalresearch/thl_django/app/settings.py b/generalresearch/thl_django/app/settings.py index 0d3f47a..050d8ab 100644 --- a/generalresearch/thl_django/app/settings.py +++ b/generalresearch/thl_django/app/settings.py @@ -1,6 +1,7 @@ import os INSTALLED_APPS = [ + "django.contrib.postgres", "django.contrib.contenttypes", "generalresearch.thl_django", ] diff --git a/generalresearch/thl_django/apps.py b/generalresearch/thl_django/apps.py index a3a0721..2813947 100644 --- a/generalresearch/thl_django/apps.py +++ b/generalresearch/thl_django/apps.py @@ -2,7 +2,7 @@ from django.apps import AppConfig class THLSchemaConfig(AppConfig): - name = "generalresearchutils.thl_django" + name = "generalresearch.thl_django" label = "thl_django" def ready(self): @@ -11,5 +11,6 @@ class THLSchemaConfig(AppConfig): from .contest import models # noqa: F401 # pycharm: keep from .event import models # noqa: F401 # pycharm: keep from .marketplace import models # noqa: F401 # pycharm: keep + from .network import models # noqa: F401 # pycharm: keep from .userhealth import models # noqa: F401 # pycharm: keep from .userprofile import models # noqa: F401 # pycharm: keep diff --git a/generalresearch/thl_django/fields.py b/generalresearch/thl_django/fields.py new file mode 100644 index 0000000..5e40ef0 --- /dev/null +++ b/generalresearch/thl_django/fields.py @@ -0,0 +1,21 @@ +from django.db import models +import ipaddress + + +class CIDRField(models.Field): + description = "PostgreSQL CIDR network" + + def db_type(self, connection): + return "cidr" + + def to_python(self, value): + if value is None or isinstance( + value, (ipaddress.IPv4Network, ipaddress.IPv6Network) + ): + return value + return ipaddress.ip_network(value, strict=False) + + def get_prep_value(self, value): + if value is None: + return None + return str(ipaddress.ip_network(value, strict=False)) diff --git a/generalresearch/thl_django/network/__init__.py b/generalresearch/thl_django/network/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/generalresearch/thl_django/network/__init__.py diff --git a/generalresearch/thl_django/network/models.py b/generalresearch/thl_django/network/models.py new file mode 100644 index 0000000..167af02 --- /dev/null +++ b/generalresearch/thl_django/network/models.py @@ -0,0 +1,356 @@ +from uuid import uuid4 +from django.utils import timezone +from django.contrib.postgres.indexes import GistIndex, GinIndex + +from django.db import models + +from generalresearch.thl_django.fields import CIDRField + + +####### +# ** Signals ** +# ToolRun +# PortScan +# PortScanPort +# RDNSResult +# Traceroute +# TracerouteHop + +# ** Features ** +# IPFeatureSnapshot + +# ** Labels ** +# IPLabel + +# ** Predictions ** +# IPPrediction +####### + + +class ToolRun(models.Model): + """ + Represents one execution of one tool against one target + """ + + id = models.BigAutoField(primary_key=True) + + # The *Target* IP. + # Should correspond to an IP we already have in the thl_ipinformation table + ip = models.GenericIPAddressField() + + # Logical grouping of multiple scans (fast scan + deep scan + rdns + trace, etc.) + scan_group_id = models.UUIDField(default=uuid4) + + class ToolClass(models.TextChoices): + PORT_SCAN = "port_scan" + RDNS = "rdns" + PING = "ping" + TRACEROUTE = "traceroute" + + tool_class = models.CharField( + max_length=32, + choices=ToolClass.choices, + ) + + # Actual binary used (e.g. nmap vs rustmap) + tool_name = models.CharField( + max_length=64, + ) + + tool_version = models.CharField( + max_length=32, + null=True, + ) + + started_at = models.DateTimeField() + finished_at = models.DateTimeField(null=True) + + class Status(models.TextChoices): + SUCCESS = "success" + FAILED = "failed" + TIMEOUT = "timeout" + ERROR = "error" + + status = models.CharField( + max_length=16, + choices=Status.choices, + default=Status.SUCCESS, + ) + + # Raw CLI invocation + raw_command = models.TextField() + # Parsed arguments / normalized config + config = models.JSONField(null=True) + + class Meta: + db_table = "network_toolrun" + indexes = [ + models.Index(fields=["started_at"]), + models.Index(fields=["scan_group_id"]), + models.Index(fields=["ip", "-started_at"]), + ] + + +class RDNSResult(models.Model): + run = models.OneToOneField( + ToolRun, + on_delete=models.CASCADE, + related_name="rdns", + primary_key=True, + ) + + # denormalized from ToolRun for query speed + ip = models.GenericIPAddressField() + started_at = models.DateTimeField() + scan_group_id = models.UUIDField() + + primary_hostname = models.CharField(max_length=255, null=True) + primary_domain = models.CharField(max_length=255, null=True) + hostname_count = models.PositiveIntegerField(default=0) + hostnames = models.JSONField(default=list) + + class Meta: + db_table = "network_rdnsresult" + indexes = [ + models.Index(fields=["ip", "-started_at"]), + models.Index(fields=["scan_group_id"]), + models.Index(fields=["primary_hostname"]), + models.Index(fields=["primary_domain"]), + ] + + +class PortScan(models.Model): + run = models.OneToOneField( + ToolRun, + on_delete=models.CASCADE, + related_name="port_scan", + primary_key=True, + ) + + # denormalized from ToolRun for query speed + ip = models.GenericIPAddressField() + started_at = models.DateTimeField() + scan_group_id = models.UUIDField() + + xml_version = models.CharField(max_length=8) + host_state = models.CharField(max_length=16) + host_state_reason = models.CharField(max_length=32) + + latency_ms = models.FloatField(null=True) + distance = models.IntegerField(null=True) + + uptime_seconds = models.IntegerField(null=True) + last_boot = models.DateTimeField(null=True) + + # Full parsed output + parsed = models.JSONField() + + # Can be inferred through a join, but will make common queries easier + open_tcp_ports = models.JSONField(default=list) + open_udp_ports = models.JSONField(default=list) + + class Meta: + db_table = "network_portscan" + indexes = [ + models.Index(fields=["scan_group_id"]), + models.Index(fields=["ip", "-started_at"]), + GinIndex(fields=["open_tcp_ports"]), + GinIndex(fields=["open_udp_ports"]), + ] + + +class PortScanPort(models.Model): + id = models.BigAutoField(primary_key=True) + port_scan = models.ForeignKey( + PortScan, + on_delete=models.CASCADE, + related_name="ports", + ) + + # https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml + protocol = models.PositiveSmallIntegerField(default=6) + # nullable b/c ICMP doesn't use ports + port = models.PositiveIntegerField(null=True) + + state = models.CharField(max_length=20) + + reason = models.CharField(max_length=32, null=True) + reason_ttl = models.IntegerField(null=True) + + service_name = models.CharField(max_length=64, null=True) + + class Meta: + db_table = "network_portscanport" + constraints = [ + models.UniqueConstraint( + fields=["port_scan", "protocol", "port"], + name="unique_port_per_scan", + ), + ] + indexes = [ + models.Index(fields=["port", "protocol", "state"]), + models.Index(fields=["state"]), + models.Index(fields=["service_name"]), + ] + + +class MTR(models.Model): + run = models.OneToOneField( + ToolRun, + on_delete=models.CASCADE, + related_name="mtr", + primary_key=True, + ) + + # denormalized from ToolRun for query speed + ip = models.GenericIPAddressField() + started_at = models.DateTimeField() + scan_group_id = models.UUIDField() + + # Source performing the trace + source_ip = models.GenericIPAddressField() + facility_id = models.PositiveIntegerField() + + # IANA protocol numbers (1=ICMP, 6=TCP, 17=UDP) + protocol = models.PositiveSmallIntegerField() + # nullable b/c ICMP doesn't use ports + port = models.PositiveIntegerField(null=True) + + # Full parsed output + parsed = models.JSONField() + + class Meta: + db_table = "network_mtr" + indexes = [ + models.Index(fields=["ip", "-started_at"]), + models.Index(fields=["scan_group_id"]), + ] + + +class MTRHop(models.Model): + id = models.BigAutoField(primary_key=True) + mtr_run = models.ForeignKey( + MTR, + on_delete=models.CASCADE, + related_name="hops", + ) + + hop = models.PositiveSmallIntegerField() + ip = models.GenericIPAddressField(null=True) + + domain = models.CharField(max_length=255, null=True) + asn = models.PositiveIntegerField(null=True) + + class Meta: + db_table = "network_mtrhop" + constraints = [ + models.UniqueConstraint( + fields=["mtr_run", "hop"], + name="unique_hop_per_run", + ) + ] + indexes = [ + models.Index(fields=["ip"]), + models.Index(fields=["asn"]), + models.Index(fields=["domain"]), + ] + + +class IPLabel(models.Model): + """ + Stores *ground truth* about an IP at a specific time. + Used for model training and evaluation. + """ + + id = models.BigAutoField(primary_key=True) + + ip = CIDRField() + + labeled_at = models.DateTimeField(default=timezone.now) + created_at = models.DateTimeField(auto_now_add=True) + + label_kind = models.CharField(max_length=32) + + source = models.CharField(max_length=32) + + confidence = models.FloatField(default=1.0) + + provider = models.CharField( + max_length=128, + null=True, + help_text="Proxy/VPN provider if known (e.g. geonode, brightdata)", + ) + + metadata = models.JSONField(null=True) + + class Meta: + db_table = "network_iplabel" + indexes = [ + GistIndex(fields=["ip"]), + models.Index(fields=["-labeled_at"]), + models.Index(fields=["ip", "-labeled_at"]), + models.Index(fields=["label_kind"]), + ] + constraints = [ + models.UniqueConstraint( + fields=["ip", "label_kind", "source", "labeled_at"], + name="unique_ip_label_event", + ) + ] + + +# ######### +# # Below here Copied/pasted from chatgpt, todo: evaluate this +# ######### +# +# +# class IPFeatureSnapshot(models.Model): +# """ +# Example features: +# open_proxy_port +# rdns_residential_score +# distance +# asn_type +# latency +# mobile_network_likelihood +# """ +# +# ip = models.GenericIPAddressField(db_index=True) +# +# scan_group_id = models.UUIDField(db_index=True) +# +# computed_at = models.DateTimeField(auto_now_add=True) +# +# features = models.JSONField() +# +# class Meta: +# db_table = "network_ip_feature_snapshot" +# indexes = [ +# models.Index(fields=["ip", "-computed_at"]), +# models.Index(fields=["scan_group_id"]), +# ] +# +# +# class IPPrediction(models.Model): +# +# ip = models.GenericIPAddressField(db_index=True) +# +# scan_group_id = models.UUIDField(db_index=True) +# +# predicted_at = models.DateTimeField(auto_now_add=True) +# +# model_version = models.CharField(max_length=32) +# +# risk_score = models.FloatField() +# +# feature_scores = models.JSONField() +# +# metadata = models.JSONField(default=dict) +# +# class Meta: +# db_table = "network_ip_prediction" +# indexes = [ +# models.Index(fields=["ip", "-predicted_at"]), +# models.Index(fields=["scan_group_id"]), +# models.Index(fields=["risk_score"]), +# ] diff --git a/pyproject.toml b/pyproject.toml index 93bdca2..55da235 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "scipy", "sentry-sdk", "slackclient", + "tldextract", "ua-parser", "user-agents", "wrapt", diff --git a/requirements.txt b/requirements.txt index 7a80011..1f55009 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,7 @@ dnspython==2.7.0 ecdsa==0.19.1 email-validator==2.3.0 Faker==37.6.0 +filelock==3.25.1 frozenlist==1.7.0 fsspec==2025.7.0 geoip2==4.7.0 @@ -78,6 +79,7 @@ pytz==2025.2 PyYAML==6.0.2 redis==6.4.0 requests==2.32.5 +requests-file==3.0.1 rsa==4.9.1 s3transfer==0.13.1 scipy==1.16.1 @@ -88,6 +90,7 @@ slackclient==2.9.4 sniffio==1.3.1 sortedcontainers==2.4.0 tblib==3.1.0 +tldextract==5.3.1 toolz==1.0.0 tornado==6.5.2 trio==0.30.0 diff --git a/test_utils/conftest.py b/test_utils/conftest.py index 54fb682..0e712bb 100644 --- a/test_utils/conftest.py +++ b/test_utils/conftest.py @@ -4,6 +4,7 @@ from os.path import join as pjoin from pathlib import Path from typing import TYPE_CHECKING, Callable from uuid import uuid4 +from datetime import datetime, timedelta, timezone import pytest import redis @@ -17,8 +18,6 @@ from generalresearch.redis_helper import RedisConfig from generalresearch.sql_helper import SqlHelper if TYPE_CHECKING: - from datetime import datetime - from generalresearch.config import GRLBaseSettings from generalresearch.currency import USDCent from generalresearch.models.thl.session import Status @@ -39,6 +38,8 @@ def env_file_path(pytestconfig: Config) -> str: def settings(env_file_path: str) -> "GRLBaseSettings": from generalresearch.config import GRLBaseSettings + print(f"{env_file_path=}") + s = GRLBaseSettings(_env_file=env_file_path) if s.thl_mkpl_rr_db is not None: @@ -202,16 +203,12 @@ def wall_status(request) -> "Status": @pytest.fixture -def utc_now() -> "datetime": - from datetime import datetime, timezone - +def utc_now() -> datetime: return datetime.now(tz=timezone.utc) @pytest.fixture -def utc_hour_ago() -> "datetime": - from datetime import datetime, timedelta, timezone - +def utc_hour_ago() -> datetime: return datetime.now(tz=timezone.utc) - timedelta(hours=1) diff --git a/test_utils/managers/network/__init__.py b/test_utils/managers/network/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test_utils/managers/network/__init__.py diff --git a/test_utils/managers/network/conftest.py b/test_utils/managers/network/conftest.py new file mode 100644 index 0000000..979dd63 --- /dev/null +++ b/test_utils/managers/network/conftest.py @@ -0,0 +1,143 @@ +import os +from datetime import timedelta, datetime, timezone +from uuid import uuid4 + +import pytest + +from generalresearch.managers.network.label import IPLabelManager +from generalresearch.managers.network.tool_run import ToolRunManager +from generalresearch.models.network.definitions import IPProtocol +from generalresearch.models.network.mtr.parser import parse_mtr_output +from generalresearch.models.network.nmap.parser import parse_nmap_xml +from generalresearch.models.network.rdns.parser import parse_rdns_output +from generalresearch.models.network.tool_run import NmapRun, Status, RDNSRun, MTRRun +from generalresearch.models.network.tool_run_command import ( + MTRRunCommand, + MTRRunCommandOptions, + RDNSRunCommand, + NmapRunCommand, + NmapRunCommandOptions, + RDNSRunCommandOptions, +) + + +@pytest.fixture(scope="session") +def scan_group_id(): + return uuid4().hex + + +@pytest.fixture(scope="session") +def iplabel_manager(thl_web_rw) -> IPLabelManager: + assert "/unittest-" in thl_web_rw.dsn.path + + return IPLabelManager(pg_config=thl_web_rw) + + +@pytest.fixture(scope="session") +def toolrun_manager(thl_web_rw) -> ToolRunManager: + assert "/unittest-" in thl_web_rw.dsn.path + + return ToolRunManager(pg_config=thl_web_rw) + + +@pytest.fixture(scope="session") +def nmap_raw_output(request) -> str: + fp = os.path.join(request.config.rootpath, "data/nmaprun1.xml") + with open(fp, "r") as f: + data = f.read() + return data + + +@pytest.fixture(scope="session") +def nmap_result(nmap_raw_output): + return parse_nmap_xml(nmap_raw_output) + + +@pytest.fixture(scope="session") +def nmap_run(nmap_result, scan_group_id): + r = nmap_result + config = NmapRunCommand( + command="nmap", + options=NmapRunCommandOptions( + ip=r.target_ip, ports="22-1000,11000,1100,3389,61232", top_ports=None + ), + ) + return NmapRun( + tool_version=r.version, + status=Status.SUCCESS, + ip=r.target_ip, + started_at=r.started_at, + finished_at=r.finished_at, + raw_command=config.to_command_str(), + scan_group_id=scan_group_id, + config=config, + parsed=r, + ) + + +@pytest.fixture(scope="session") +def dig_raw_output(): + return "156.32.33.45.in-addr.arpa. 300 IN PTR scanme.nmap.org." + + +@pytest.fixture(scope="session") +def rdns_result(dig_raw_output): + return parse_rdns_output(ip="45.33.32.156", raw=dig_raw_output) + + +@pytest.fixture(scope="session") +def rdns_run(rdns_result, scan_group_id): + r = rdns_result + ip = "45.33.32.156" + utc_now = datetime.now(tz=timezone.utc) + config = RDNSRunCommand(command="dig", options=RDNSRunCommandOptions(ip=ip)) + return RDNSRun( + tool_version="1.2.3", + status=Status.SUCCESS, + ip=ip, + started_at=utc_now, + finished_at=utc_now + timedelta(seconds=1), + raw_command=config.to_command_str(), + scan_group_id=scan_group_id, + config=config, + parsed=r, + ) + + +@pytest.fixture(scope="session") +def mtr_raw_output(request): + fp = os.path.join(request.config.rootpath, "data/mtr_fatbeam.json") + with open(fp, "r") as f: + data = f.read() + return data + + +@pytest.fixture(scope="session") +def mtr_result(mtr_raw_output): + return parse_mtr_output(mtr_raw_output, port=443, protocol=IPProtocol.TCP) + + +@pytest.fixture(scope="session") +def mtr_run(mtr_result, scan_group_id): + r = mtr_result + utc_now = datetime.now(tz=timezone.utc) + config = MTRRunCommand( + command="mtr", + options=MTRRunCommandOptions( + ip=r.destination, protocol=IPProtocol.TCP, port=443 + ), + ) + + return MTRRun( + tool_version="1.2.3", + status=Status.SUCCESS, + ip=r.destination, + started_at=utc_now, + finished_at=utc_now + timedelta(seconds=1), + raw_command=config.to_command_str(), + scan_group_id=scan_group_id, + config=config, + parsed=r, + facility_id=1, + source_ip="1.2.3.4", + ) diff --git a/tests/conftest.py b/tests/conftest.py index 30ed1c7..2482269 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ pytest_plugins = [ "test_utils.managers.conftest", "test_utils.managers.contest.conftest", "test_utils.managers.ledger.conftest", + "test_utils.managers.network.conftest", "test_utils.managers.upk.conftest", # -- Models "test_utils.models.conftest", diff --git a/tests/data/mtr_fatbeam.json b/tests/data/mtr_fatbeam.json new file mode 100644 index 0000000..6e27eb1 --- /dev/null +++ b/tests/data/mtr_fatbeam.json @@ -0,0 +1,206 @@ +{ + "report": { + "mtr": { + "src": "gstupp-ThinkPad-X1-Carbon-Gen-11", + "dst": "167.150.6.80", + "tos": 0, + "tests": 10, + "psize": "64", + "bitpattern": "0x00" + }, + "hubs": [ + { + "count": 1, + "host": "_gateway (172.20.20.1)", + "ASN": "AS???", + "Loss%": 0.0, + "Snt": 10, + "Last": 2.408, + "Avg": 16.157, + "Best": 2.408, + "Wrst": 69.531, + "StDev": 20.69 + }, + { + "count": 2, + "host": "172.16.20.1", + "ASN": "AS???", + "Loss%": 0.0, + "Snt": 10, + "Last": 3.411, + "Avg": 16.906, + "Best": 2.613, + "Wrst": 90.7, + "StDev": 27.547 + }, + { + "count": 3, + "host": "192.168.1.254", + "ASN": "AS???", + "Loss%": 0.0, + "Snt": 10, + "Last": 17.012, + "Avg": 9.812, + "Best": 3.061, + "Wrst": 25.728, + "StDev": 8.908 + }, + { + "count": 4, + "host": "ipdsl-jal-ptovallarta-19-l0.uninet.net.mx (201.154.95.117)", + "ASN": "AS???", + "Loss%": 0.0, + "Snt": 10, + "Last": 6.954, + "Avg": 10.216, + "Best": 6.177, + "Wrst": 16.151, + "StDev": 3.343 + }, + { + "count": 5, + "host": "bb-la-onewilshire-29-ae32_0.uninet.net.mx (189.246.202.49)", + "ASN": "AS???", + "Loss%": 0.0, + "Snt": 10, + "Last": 52.557, + "Avg": 54.174, + "Best": 45.681, + "Wrst": 71.387, + "StDev": 8.011 + }, + { + "count": 6, + "host": "ae91.edge7.LosAngeles1.Level3.net (4.7.28.197)", + "ASN": "AS3356", + "Loss%": 0.0, + "Snt": 10, + "Last": 1079.2, + "Avg": 875.97, + "Best": 47.78, + "Wrst": 4150.5, + "StDev": 1345.7 + }, + { + "count": 7, + "host": "???", + "ASN": "AS???", + "Loss%": 100.0, + "Snt": 10, + "Last": 0.0, + "Avg": 0.0, + "Best": 0.0, + "Wrst": 0.0, + "StDev": 0.0 + }, + { + "count": 8, + "host": "ae10.cr1.lax10.us.zip.zayo.com (64.125.28.224)", + "ASN": "AS6461", + "Loss%": 70.0, + "Snt": 10, + "Last": 1186.5, + "Avg": 2189.8, + "Best": 1186.5, + "Wrst": 3202.8, + "StDev": 1008.2 + }, + { + "count": 9, + "host": "ae16.cr1.sjc1.us.zip.zayo.com (64.125.21.171)", + "ASN": "AS6461", + "Loss%": 0.0, + "Snt": 10, + "Last": 92.819, + "Avg": 414.75, + "Best": 90.799, + "Wrst": 2140.8, + "StDev": 690.96 + }, + { + "count": 10, + "host": "ae27.cs3.sjc7.us.zip.zayo.com (64.125.18.28)", + "ASN": "AS6461", + "Loss%": 90.0, + "Snt": 10, + "Last": 5234.8, + "Avg": 5234.8, + "Best": 5234.8, + "Wrst": 5234.8, + "StDev": 0.0 + }, + { + "count": 11, + "host": "???", + "ASN": "AS???", + "Loss%": 100.0, + "Snt": 10, + "Last": 0.0, + "Avg": 0.0, + "Best": 0.0, + "Wrst": 0.0, + "StDev": 0.0 + }, + { + "count": 12, + "host": "ae8.cr1.sea1.us.zip.zayo.com (64.125.28.193)", + "ASN": "AS6461", + "Loss%": 0.0, + "Snt": 10, + "Last": 93.389, + "Avg": 1238.6, + "Best": 91.537, + "Wrst": 5223.9, + "StDev": 1644.1 + }, + { + "count": 13, + "host": "ae7.ter2.sea1.us.zip.zayo.com (64.125.19.197)", + "ASN": "AS6461", + "Loss%": 0.0, + "Snt": 10, + "Last": 91.212, + "Avg": 112.17, + "Best": 90.979, + "Wrst": 178.26, + "StDev": 30.086 + }, + { + "count": 14, + "host": "208.185.33.178.IDIA-369396-ZYO.zip.zayo.com (208.185.33.178)", + "ASN": "AS6461", + "Loss%": 0.0, + "Snt": 10, + "Last": 103.95, + "Avg": 104.46, + "Best": 90.349, + "Wrst": 136.62, + "StDev": 15.726 + }, + { + "count": 15, + "host": "168.245.215.250", + "ASN": "AS55039", + "Loss%": 0.0, + "Snt": 10, + "Last": 85.672, + "Avg": 95.289, + "Best": 84.352, + "Wrst": 156.16, + "StDev": 21.621 + }, + { + "count": 16, + "host": "???", + "ASN": "AS???", + "Loss%": 100.0, + "Snt": 10, + "Last": 0.0, + "Avg": 0.0, + "Best": 0.0, + "Wrst": 0.0, + "StDev": 0.0 + } + ] + } +} diff --git a/tests/data/nmaprun1.xml b/tests/data/nmaprun1.xml new file mode 100644 index 0000000..c5fed6c --- /dev/null +++ b/tests/data/nmaprun1.xml @@ -0,0 +1,68 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE nmaprun> +<?xml-stylesheet href="file:///usr/bin/../share/nmap/nmap.xsl" type="text/xsl"?> +<!-- Nmap 7.80 scan initiated Wed Mar 4 12:59:08 2026 as: nmap -O -A -Pn -sV -p 22-1000,11000,1100,3389,61232 -oX - 108.171.53.1 --> +<nmaprun scanner="nmap" args="nmap -O -A -Pn -sV -p 22-1000,11000,1100,3389,61232 -oX - 108.171.53.1" start="1772650748" + startstr="Wed Mar 4 12:59:08 2026" version="7.80" xmloutputversion="1.04"> + <scaninfo type="syn" protocol="tcp" numservices="983" services="22-1000,1100,3389,11000,61232"/> + <scaninfo type="syn" protocol="udp" numservices="983" services="69,420"/> + <verbose level="0"/> + <debugging level="0"/> + <host starttime="1772650748" endtime="1772650901"> + <status state="up" reason="user-set" reason_ttl="0"/> + <address addr="108.171.53.1" addrtype="ipv4"/> + <hostnames> + <hostname name="108-171-53-1.aceips.com" type="PTR"/> + </hostnames> + <ports> + <extraports state="filtered" count="982"> + <extrareasons reason="no-responses" count="982"/> + </extraports> + <port protocol="tcp" portid="61232"> + <state state="open" reason="syn-ack" reason_ttl="47"/> + <service name="socks5" extrainfo="Username/password authentication required" method="probed" conf="10"/> + <script id="socks-auth-info" output="
 Username and password"> + <table> + <elem key="name">Username and password</elem> + <elem key="method">2</elem> + </table> + </script> + </port> + </ports> + <os> + <portused state="open" proto="tcp" portid="61232"/> + </os> + <uptime seconds="4063775" lastboot="Fri Jan 16 12:12:06 2026"/> + <distance value="21"/> + <tcpsequence index="263" difficulty="Good luck!" values="B67613E,2F0160C6,27FA849E,3B16FC3A,916ADEAF,18293FD7"/> + <ipidsequence class="All zeros" values="0,0,0,0,0,0"/> + <tcptssequence class="1000HZ" values="F2383963,F23839C0,F2383A24,F2383A8C,F2383AED,F2383B54"/> + <trace port="61232" proto="tcp"> + <hop ttl="1" ipaddr="192.168.86.1" rtt="3.83" host="_gateway"/> + <hop ttl="2" ipaddr="192.168.100.1" rtt="4.90"/> + <hop ttl="3" ipaddr="10.125.64.1" rtt="6.01"/> + <hop ttl="4" ipaddr="10.180.25.105" rtt="14.52"/> + <hop ttl="5" ipaddr="10.180.25.106" rtt="16.47"/> + <hop ttl="6" ipaddr="10.180.25.81" rtt="28.56"/> + <hop ttl="7" ipaddr="62.115.192.20" rtt="17.17" host="gdl-b2-link.ip.twelve99.net"/> + <hop ttl="8" ipaddr="62.115.140.131" rtt="29.56" host="mny-b4-link.ip.twelve99.net"/> + <hop ttl="9" ipaddr="62.115.124.12" rtt="27.68" host="mdc-b2-link.ip.twelve99.net"/> + <hop ttl="10" ipaddr="62.115.126.144" rtt="34.92" host="hou-b3-link.ip.twelve99.net"/> + <hop ttl="11" ipaddr="62.115.116.47" rtt="48.42" host="atl-b24-link.ip.twelve99.net"/> + <hop ttl="13" ipaddr="62.115.137.132" rtt="62.35" host="ash-bb2-link.ip.twelve99.net"/> + <hop ttl="14" ipaddr="62.115.139.34" rtt="67.51" host="nyk-bb5-link.ip.twelve99.net"/> + <hop ttl="16" ipaddr="62.115.136.13" rtt="64.75" host="ewr-b15-link.ip.twelve99.net"/> + <hop ttl="17" ipaddr="62.115.136.13" rtt="67.87" host="ewr-b15-link.ip.twelve99.net"/> + <hop ttl="19" ipaddr="75.76.132.68" rtt="72.32" host="hge0-1-0-6.aggr1.jfk.ny.rcn.net"/> + <hop ttl="20" ipaddr="207.237.73.98" rtt="68.13"/> + <hop ttl="21" ipaddr="108.171.53.1" rtt="64.76" host="108-171-53-1.aceips.com"/> + </trace> + <times srtt="54719" rttvar="23423" to="148411"/> + </host> + <runstats> + <finished time="1772650901" timestr="Wed Mar 4 13:01:41 2026" elapsed="153.06" + summary="Nmap done at Wed Mar 4 13:01:41 2026; 1 IP address (1 host up) scanned in 153.06 seconds" + exit="success"/> + <hosts up="1" down="0" total="1"/> + </runstats> +</nmaprun> diff --git a/tests/data/nmaprun2.xml b/tests/data/nmaprun2.xml new file mode 100644 index 0000000..932896c --- /dev/null +++ b/tests/data/nmaprun2.xml @@ -0,0 +1,118 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE nmaprun> +<?xml-stylesheet href="file:///usr/bin/../share/nmap/nmap.xsl" type="text/xsl"?> +<!-- Nmap 7.80 scan initiated Tue Mar 3 17:50:03 2026 as: nmap -T4 -A -oX - scanme.nmap.org --> +<nmaprun scanner="nmap" args="nmap -T4 -A -oX - scanme.nmap.org" start="1772581803" startstr="Tue Mar 3 17:50:03 2026" + version="7.80" xmloutputversion="1.04"> + <scaninfo type="connect" protocol="tcp" numservices="1000" + services="1,3-4,6-7,9,13,17,19-26,30,32-33,37,42-43,49,53,70,79-85,88-90,99-100,106,109-111,113,119,125,135,139,143-144,146,161,163,179,199,211-212,222,254-256,259,264,280,301,306,311,340,366,389,406-407,416-417,425,427,443-445,458,464-465,481,497,500,512-515,524,541,543-545,548,554-555,563,587,593,616-617,625,631,636,646,648,666-668,683,687,691,700,705,711,714,720,722,726,749,765,777,783,787,800-801,808,843,873,880,888,898,900-903,911-912,981,987,990,992-993,995,999-1002,1007,1009-1011,1021-1100,1102,1104-1108,1110-1114,1117,1119,1121-1124,1126,1130-1132,1137-1138,1141,1145,1147-1149,1151-1152,1154,1163-1166,1169,1174-1175,1183,1185-1187,1192,1198-1199,1201,1213,1216-1218,1233-1234,1236,1244,1247-1248,1259,1271-1272,1277,1287,1296,1300-1301,1309-1311,1322,1328,1334,1352,1417,1433-1434,1443,1455,1461,1494,1500-1501,1503,1521,1524,1533,1556,1580,1583,1594,1600,1641,1658,1666,1687-1688,1700,1717-1721,1723,1755,1761,1782-1783,1801,1805,1812,1839-1840,1862-1864,1875,1900,1914,1935,1947,1971-1972,1974,1984,1998-2010,2013,2020-2022,2030,2033-2035,2038,2040-2043,2045-2049,2065,2068,2099-2100,2103,2105-2107,2111,2119,2121,2126,2135,2144,2160-2161,2170,2179,2190-2191,2196,2200,2222,2251,2260,2288,2301,2323,2366,2381-2383,2393-2394,2399,2401,2492,2500,2522,2525,2557,2601-2602,2604-2605,2607-2608,2638,2701-2702,2710,2717-2718,2725,2800,2809,2811,2869,2875,2909-2910,2920,2967-2968,2998,3000-3001,3003,3005-3007,3011,3013,3017,3030-3031,3052,3071,3077,3128,3168,3211,3221,3260-3261,3268-3269,3283,3300-3301,3306,3322-3325,3333,3351,3367,3369-3372,3389-3390,3404,3476,3493,3517,3527,3546,3551,3580,3659,3689-3690,3703,3737,3766,3784,3800-3801,3809,3814,3826-3828,3851,3869,3871,3878,3880,3889,3905,3914,3918,3920,3945,3971,3986,3995,3998,4000-4006,4045,4111,4125-4126,4129,4224,4242,4279,4321,4343,4443-4446,4449,4550,4567,4662,4848,4899-4900,4998,5000-5004,5009,5030,5033,5050-5051,5054,5060-5061,5080,5087,5100-5102,5120,5190,5200,5214,5221-5222,5225-5226,5269,5280,5298,5357,5405,5414,5431-5432,5440,5500,5510,5544,5550,5555,5560,5566,5631,5633,5666,5678-5679,5718,5730,5800-5802,5810-5811,5815,5822,5825,5850,5859,5862,5877,5900-5904,5906-5907,5910-5911,5915,5922,5925,5950,5952,5959-5963,5987-5989,5998-6007,6009,6025,6059,6100-6101,6106,6112,6123,6129,6156,6346,6389,6502,6510,6543,6547,6565-6567,6580,6646,6666-6669,6689,6692,6699,6779,6788-6789,6792,6839,6881,6901,6969,7000-7002,7004,7007,7019,7025,7070,7100,7103,7106,7200-7201,7402,7435,7443,7496,7512,7625,7627,7676,7741,7777-7778,7800,7911,7920-7921,7937-7938,7999-8002,8007-8011,8021-8022,8031,8042,8045,8080-8090,8093,8099-8100,8180-8181,8192-8194,8200,8222,8254,8290-8292,8300,8333,8383,8400,8402,8443,8500,8600,8649,8651-8652,8654,8701,8800,8873,8888,8899,8994,9000-9003,9009-9011,9040,9050,9071,9080-9081,9090-9091,9099-9103,9110-9111,9200,9207,9220,9290,9415,9418,9485,9500,9502-9503,9535,9575,9593-9595,9618,9666,9876-9878,9898,9900,9917,9929,9943-9944,9968,9998-10004,10009-10010,10012,10024-10025,10082,10180,10215,10243,10566,10616-10617,10621,10626,10628-10629,10778,11110-11111,11967,12000,12174,12265,12345,13456,13722,13782-13783,14000,14238,14441-14442,15000,15002-15004,15660,15742,16000-16001,16012,16016,16018,16080,16113,16992-16993,17877,17988,18040,18101,18988,19101,19283,19315,19350,19780,19801,19842,20000,20005,20031,20221-20222,20828,21571,22939,23502,24444,24800,25734-25735,26214,27000,27352-27353,27355-27356,27715,28201,30000,30718,30951,31038,31337,32768-32785,33354,33899,34571-34573,35500,38292,40193,40911,41511,42510,44176,44442-44443,44501,45100,48080,49152-49161,49163,49165,49167,49175-49176,49400,49999-50003,50006,50300,50389,50500,50636,50800,51103,51493,52673,52822,52848,52869,54045,54328,55055-55056,55555,55600,56737-56738,57294,57797,58080,60020,60443,61532,61900,62078,63331,64623,64680,65000,65129,65389"/> + <verbose level="0"/> + <debugging level="0"/> + <host starttime="1772581804" endtime="1772581818"> + <status state="up" reason="syn-ack" reason_ttl="0"/> + <address addr="45.33.32.156" addrtype="ipv4"/> + <hostnames> + <hostname name="scanme.nmap.org" type="user"/> + <hostname name="scanme.nmap.org" type="PTR"/> + </hostnames> + <ports> + <extraports state="closed" count="995"> + <extrareasons reason="conn-refused" count="995"/> + </extraports> + <port protocol="tcp" portid="22"> + <state state="open" reason="syn-ack" reason_ttl="0"/> + <service name="ssh" product="OpenSSH" version="6.6.1p1 Ubuntu 2ubuntu2.13" + extrainfo="Ubuntu Linux; protocol 2.0" ostype="Linux" method="probed" conf="10"> + <cpe>cpe:/a:openbsd:openssh:6.6.1p1</cpe> + <cpe>cpe:/o:linux:linux_kernel</cpe> + </service> + <script id="ssh-hostkey" + output="
 1024 ac:00:a0:1a:82:ff:cc:55:99:dc:67:2b:34:97:6b:75 (DSA)
 2048 20:3d:2d:44:62:2a:b0:5a:9d:b5:b3:05:14:c2:a6:b2 (RSA)
 256 96:02:bb:5e:57:54:1c:4e:45:2f:56:4c:4a:24:b2:57 (ECDSA)
 256 33:fa:91:0f:e0:e1:7b:1f:6d:05:a2:b0:f1:54:41:56 (ED25519)"> + <table> + <elem key="bits">1024</elem> + <elem key="type">ssh-dss</elem> + <elem key="key"> + AAAAB3NzaC1kc3MAAACBAOe8o59vFWZGaBmGPVeJBObEfi1AR8yEUYC/Ufkku3sKhGF7wM2m2ujIeZDK5vqeC0S5EN2xYo6FshCP4FQRYeTxD17nNO4PhwW65qAjDRRU0uHFfSAh5wk+vt4yQztOE++sTd1G9OBLzA8HO99qDmCAxb3zw+GQDEgPjzgyzGZ3AAAAFQCBmE1vROP8IaPkUmhM5xLFta/xHwAAAIEA3EwRfaeOPLL7TKDgGX67Lbkf9UtdlpCdC4doMjGgsznYMwWH6a7Lj3vi4/KmeZZdix6FMdFqq+2vrfT1DRqx0RS0XYdGxnkgS+2g333WYCrUkDCn6RPUWR/1TgGMPHCj7LWCa1ZwJwLWS2KX288Pa2gLOWuhZm2VYKSQx6NEDOIAAACBANxIfprSdBdbo4Ezrh6/X6HSvrhjtZ7MouStWaE714ByO5bS2coM9CyaCwYyrE5qzYiyIfb+1BG3O5nVdDuN95sQ/0bAdBKlkqLFvFqFjVbETF0ri3v97w6MpUawfF75ouDrQ4xdaUOLLEWTso6VFJcM6Jg9bDl0FA0uLZUSDEHL + </elem> + <elem key="fingerprint">ac00a01a82ffcc5599dc672b34976b75</elem> + </table> + <table> + <elem key="bits">2048</elem> + <elem key="type">ssh-rsa</elem> + <elem key="key"> + AAAAB3NzaC1yc2EAAAADAQABAAABAQC6afooTZ9mVUGFNEhkMoRR1Btzu64XXwElhCsHw/zVlIx/HXylNbb9+11dm2VgJQ21pxkWDs+L6+EbYyDnvRURTrMTgHL0xseB0EkNqexs9hYZSiqtMx4jtGNtHvsMxZnbxvVUk2dasWvtBkn8J5JagSbzWTQo4hjKMOI1SUlXtiKxAs2F8wiq2EdSuKw/KNk8GfIp1TA+8ccGeAtnsVptTJ4D/8MhAWsROkQzOowQvnBBz2/8ecEvoMScaf+kDfNQowK3gENtSSOqYw9JLOza6YJBPL/aYuQQ0nJ74Rr5vL44aNIlrGI9jJc2x0bV7BeNA5kVuXsmhyfWbbkB8yGd + </elem> + <elem key="fingerprint">203d2d44622ab05a9db5b30514c2a6b2</elem> + </table> + <table> + <elem key="bits">256</elem> + <elem key="type">ecdsa-sha2-nistp256</elem> + <elem key="key"> + AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBMD46g67x6yWNjjQJnXhiz/TskHrqQ0uPcOspFrIYW382uOGzmWDZCFV8FbFwQyH90u+j0Qr1SGNAxBZMhOQ8pc= + </elem> + <elem key="fingerprint">9602bb5e57541c4e452f564c4a24b257</elem> + </table> + <table> + <elem key="bits">256</elem> + <elem key="type">ssh-ed25519</elem> + <elem key="key">AAAAC3NzaC1lZDI1NTE5AAAAILzVjfIyIHfXyRd8jVBaVT8Yvk/UvHh5Afvho8sGciG7</elem> + <elem key="fingerprint">33fa910fe0e17b1f6d05a2b0f1544156</elem> + </table> + </script> + </port> + <port protocol="tcp" portid="25"> + <state state="filtered" reason="no-response" reason_ttl="0"/> + <service name="smtp" method="table" conf="3"/> + </port> + <port protocol="tcp" portid="80"> + <state state="open" reason="syn-ack" reason_ttl="0"/> + <service name="http" product="Apache httpd" version="2.4.7" extrainfo="(Ubuntu)" method="probed" + conf="10"> + <cpe>cpe:/a:apache:http_server:2.4.7</cpe> + </service> + <script id="http-server-header" output="Apache/2.4.7 (Ubuntu)"> + <elem>Apache/2.4.7 (Ubuntu)</elem> + </script> + <script id="http-title" output="Go ahead and ScanMe!"> + <elem key="title">Go ahead and ScanMe!</elem> + </script> + </port> + <port protocol="tcp" portid="9929"> + <state state="open" reason="syn-ack" reason_ttl="0"/> + <service name="nping-echo" product="Nping echo" method="probed" conf="10"/> + </port> + <port protocol="tcp" portid="31337"> + <state state="open" reason="syn-ack" reason_ttl="0"/> + <service name="tcpwrapped" method="probed" conf="8"/> + </port> + </ports> + <os> + <portused state="open" proto="tcp" portid="22"/> + <portused state="closed" proto="tcp" portid="1"/> + <portused state="closed" proto="udp" portid="31994"/> + <osmatch name="Linux 2.6.38 - 3.0" accuracy="100" line="43893"> + <osclass type="general purpose" vendor="Linux" osfamily="Linux" osgen="2.6.X" accuracy="100"> + <cpe>cpe:/o:linux:linux_kernel:2.6</cpe> + </osclass> + <osclass type="general purpose" vendor="Linux" osfamily="Linux" osgen="3.X" accuracy="100"> + <cpe>cpe:/o:linux:linux_kernel:3</cpe> + </osclass> + </osmatch> + <osfingerprint + fingerprint=" SEQ(SP=C5%GCD=1%ISR=C7%TI=Z%II=I%TS=8) ECN(R=Y%DF=Y%T=40%W=16D0%O=M5B4NNSNW2%CC=N%Q=) T1(R=Y%DF=Y%T=40%S=O%A=S+%F=AS%RD=0%Q=) T2(R=N) T3(R=Y%DF=Y%T=40%W=16A0%S=O%A=S+%F=AS%O=M5B4ST11NW2%RD=0%Q=) T4(R=Y%DF=Y%T=40%W=0%S=A%A=Z%F=R%O=%RD=0%Q=) T7(R=Y%DF=Y%T=40%W=0%S=Z%A=S+%F=AR%O=%RD=0%Q=) U1(R=Y%DF=N%T=40%TOS=C0%IPL=164%UN=0%RIPL=G%RID=G%RIPCK=G%RUCK=G%RUL=G%RUD=G) IE(R=Y%DFI=N%T=40%TOSI=S%CD=S%SI=S%DLI=S) "/> + </os> + <hostscript> + <script id="sql-injection" output="Possible SQL injection vulnerability detected: + URI: /index.php?id=1' + Payload: id=1' OR '1'='1 + Output: SQL error detected"/> + </hostscript> + <times srtt="90053" rttvar="8723" to="124945"/> + </host> + <runstats> + <finished time="1772581818" timestr="Tue Mar 3 17:50:18 2026" elapsed="14.44" + summary="Nmap done at Tue Mar 3 17:50:18 2026; 1 IP address (1 host up) scanned in 14.44 seconds" + exit="success"/> + <hosts up="1" down="0" total="1"/> + </runstats> +</nmaprun> diff --git a/tests/managers/network/__init__.py b/tests/managers/network/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/managers/network/__init__.py diff --git a/tests/managers/network/test_label.py b/tests/managers/network/test_label.py new file mode 100644 index 0000000..5b9a790 --- /dev/null +++ b/tests/managers/network/test_label.py @@ -0,0 +1,202 @@ +import ipaddress + +import faker +import pytest +from psycopg.errors import UniqueViolation +from pydantic import ValidationError + +from generalresearch.managers.network.label import IPLabelManager +from generalresearch.models.network.label import ( + IPLabel, + IPLabelKind, + IPLabelSource, + IPLabelMetadata, +) +from generalresearch.models.thl.ipinfo import normalize_ip + +fake = faker.Faker() + + +@pytest.fixture +def ip_label(utc_now) -> IPLabel: + ip = ipaddress.IPv6Network((fake.ipv6(), 64), strict=False) + return IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip, + metadata=IPLabelMetadata(services=["RDP"]) + ) + + +def test_model(utc_now): + ip = fake.ipv4_public() + lbl = IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip, + ) + assert lbl.ip.prefixlen == 32 + print(f"{lbl.ip=}") + + ip = ipaddress.IPv4Network((ip, 24), strict=False) + lbl = IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip, + ) + print(f"{lbl.ip=}") + + with pytest.raises(ValidationError, match="IPv6 network must be /64 or larger"): + IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=fake.ipv6(), + ) + + ip = ipaddress.IPv6Network((fake.ipv6(), 64), strict=False) + lbl = IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip, + ) + print(f"{lbl.ip=}") + + ip = ipaddress.IPv6Network((ip.network_address, 48), strict=False) + lbl = IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip, + ) + print(f"{lbl.ip=}") + + +def test_create(iplabel_manager: IPLabelManager, ip_label: IPLabel): + iplabel_manager.create(ip_label) + + with pytest.raises( + UniqueViolation, match="duplicate key value violates unique constraint" + ): + iplabel_manager.create(ip_label) + + +def test_filter(iplabel_manager: IPLabelManager, ip_label: IPLabel, utc_hour_ago): + res = iplabel_manager.filter(ips=[ip_label.ip]) + assert len(res) == 0 + + iplabel_manager.create(ip_label) + res = iplabel_manager.filter(ips=[ip_label.ip]) + assert len(res) == 1 + + out = res[0] + assert out == ip_label + + res = iplabel_manager.filter(ips=[ip_label.ip], labeled_after=utc_hour_ago) + assert len(res) == 1 + + ip_label2 = ip_label.model_copy() + ip_label2.ip = fake.ipv4_public() + iplabel_manager.create(ip_label2) + res = iplabel_manager.filter(ips=[ip_label.ip, ip_label2.ip]) + assert len(res) == 2 + + +def test_filter_network( + iplabel_manager: IPLabelManager, ip_label: IPLabel, utc_hour_ago +): + print(ip_label) + ip_label = ip_label.model_copy() + ip_label.ip = ipaddress.IPv6Network((fake.ipv6(), 64), strict=False) + + iplabel_manager.create(ip_label) + res = iplabel_manager.filter(ips=[ip_label.ip]) + assert len(res) == 1 + + out = res[0] + assert out == ip_label + + res = iplabel_manager.filter(ips=[ip_label.ip], labeled_after=utc_hour_ago) + assert len(res) == 1 + + ip_label2 = ip_label.model_copy() + ip_label2.ip = fake.ipv4_public() + iplabel_manager.create(ip_label2) + res = iplabel_manager.filter(ips=[ip_label.ip, ip_label2.ip]) + assert len(res) == 2 + + +def test_network(iplabel_manager: IPLabelManager, utc_now): + # This is a fully-specific /128 ipv6 address. + # e.g. '51b7:b38d:8717:6c5b:cd3e:f5c3:3aba:17d' + ip = fake.ipv6() + # Generally, we'd want to annotate the /64 network + # e.g. '51b7:b38d:8717:6c5b::/64' + ip_64 = ipaddress.IPv6Network((ip, 64), strict=False) + + label = IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip_64, + ) + iplabel_manager.create(label) + + # If I query for the /128 directly, I won't find it + res = iplabel_manager.filter(ips=[ip]) + assert len(res) == 0 + + # If I query for the /64 network I will + res = iplabel_manager.filter(ips=[ip_64]) + assert len(res) == 1 + + # Or, I can query for the /128 ip IN a network + res = iplabel_manager.filter(ip_in_network=ip) + assert len(res) == 1 + + +def test_label_cidr_and_ipinfo( + iplabel_manager: IPLabelManager, ip_information_factory, ip_geoname, utc_now +): + # We have network_iplabel.ip as a cidr col and + # thl_ipinformation.ip as a inet col. Make sure we can join appropriately + ip = fake.ipv6() + ip_information_factory(ip=ip, geoname=ip_geoname) + # We normalize for storage into ipinfo table + ip_norm, prefix = normalize_ip(ip) + + # Test with a larger network + ip_48 = ipaddress.IPv6Network((ip, 48), strict=False) + print(f"{ip=}") + print(f"{ip_norm=}") + print(f"{ip_48=}") + label = IPLabel( + label_kind=IPLabelKind.VPN, + labeled_at=utc_now, + source=IPLabelSource.INTERNAL_USE, + provider="GeoNodE", + created_at=utc_now, + ip=ip_48, + ) + iplabel_manager.create(label) + + res = iplabel_manager.test_join(ip_norm) + print(res) diff --git a/tests/managers/network/test_tool_run.py b/tests/managers/network/test_tool_run.py new file mode 100644 index 0000000..a815809 --- /dev/null +++ b/tests/managers/network/test_tool_run.py @@ -0,0 +1,25 @@ +def test_create_tool_run_from_nmap_run(nmap_run, toolrun_manager): + + toolrun_manager.create_nmap_run(nmap_run) + + run_out = toolrun_manager.get_nmap_run(nmap_run.id) + + assert nmap_run == run_out + + +def test_create_tool_run_from_rdns_run(rdns_run, toolrun_manager): + + toolrun_manager.create_rdns_run(rdns_run) + + run_out = toolrun_manager.get_rdns_run(rdns_run.id) + + assert rdns_run == run_out + + +def test_create_tool_run_from_mtr_run(mtr_run, toolrun_manager): + + toolrun_manager.create_mtr_run(mtr_run) + + run_out = toolrun_manager.get_mtr_run(mtr_run.id) + + assert mtr_run == run_out diff --git a/tests/models/network/__init__.py b/tests/models/network/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/models/network/__init__.py diff --git a/tests/models/network/test_mtr.py b/tests/models/network/test_mtr.py new file mode 100644 index 0000000..2965300 --- /dev/null +++ b/tests/models/network/test_mtr.py @@ -0,0 +1,26 @@ +from generalresearch.models.network.mtr.execute import execute_mtr +import faker + +from generalresearch.models.network.tool_run import ToolName, ToolClass + +fake = faker.Faker() + + +def test_execute_mtr(toolrun_manager): + ip = "65.19.129.53" + + run = execute_mtr(ip=ip, report_cycles=3) + assert run.tool_name == ToolName.MTR + assert run.tool_class == ToolClass.TRACEROUTE + assert run.ip == ip + result = run.parsed + + last_hop = result.hops[-1] + assert last_hop.asn == 6939 + assert last_hop.domain == "grlengine.com" + + last_hop_1 = result.hops[-2] + assert last_hop_1.asn == 6939 + assert last_hop_1.domain == "he.net" + + toolrun_manager.create_mtr_run(run) diff --git a/tests/models/network/test_nmap.py b/tests/models/network/test_nmap.py new file mode 100644 index 0000000..0be98d4 --- /dev/null +++ b/tests/models/network/test_nmap.py @@ -0,0 +1,29 @@ +import subprocess + +from generalresearch.models.network.definitions import IPProtocol +from generalresearch.models.network.nmap.execute import execute_nmap +import faker + +from generalresearch.models.network.nmap.result import PortState +from generalresearch.models.network.tool_run import ToolName, ToolClass + +fake = faker.Faker() + + +def resolve(host): + return subprocess.check_output(["dig", host, "+short"]).decode().strip() + + +def test_execute_nmap_scanme(toolrun_manager): + ip = resolve("scanme.nmap.org") + + run = execute_nmap(ip=ip, top_ports=None, ports="20-30", enable_advanced=False) + assert run.tool_name == ToolName.NMAP + assert run.tool_class == ToolClass.PORT_SCAN + assert run.ip == ip + result = run.parsed + + port22 = result._port_index[(IPProtocol.TCP, 22)] + assert port22.state == PortState.OPEN + + toolrun_manager.create_nmap_run(run) diff --git a/tests/models/network/test_nmap_parser.py b/tests/models/network/test_nmap_parser.py new file mode 100644 index 0000000..96d7b37 --- /dev/null +++ b/tests/models/network/test_nmap_parser.py @@ -0,0 +1,22 @@ +import os + +import pytest + +from generalresearch.models.network.nmap.parser import parse_nmap_xml + +@pytest.fixture +def nmap_raw_output_2(request) -> str: + fp = os.path.join(request.config.rootpath, "data/nmaprun2.xml") + with open(fp, "r") as f: + data = f.read() + return data + + +def test_nmap_xml_parser(nmap_raw_output, nmap_raw_output_2): + n = parse_nmap_xml(nmap_raw_output) + assert n.tcp_open_ports == [61232] + assert len(n.trace.hops) == 18 + + n = parse_nmap_xml(nmap_raw_output_2) + assert n.tcp_open_ports == [22, 80, 9929, 31337] + assert n.trace is None diff --git a/tests/models/network/test_rdns.py b/tests/models/network/test_rdns.py new file mode 100644 index 0000000..e56c494 --- /dev/null +++ b/tests/models/network/test_rdns.py @@ -0,0 +1,33 @@ +from generalresearch.models.network.rdns.execute import execute_rdns +import faker + +from generalresearch.models.network.tool_run import ToolName, ToolClass + +fake = faker.Faker() + + +def test_execute_rdns_grl(toolrun_manager): + ip = "65.19.129.53" + run = execute_rdns(ip=ip) + assert run.tool_name == ToolName.DIG + assert run.tool_class == ToolClass.RDNS + assert run.ip == ip + result = run.parsed + assert result.primary_hostname == "in1-smtp.grlengine.com" + assert result.primary_domain == "grlengine.com" + assert result.hostname_count == 1 + + toolrun_manager.create_rdns_run(run) + + +def test_execute_rdns_none(toolrun_manager): + ip = fake.ipv6() + run = execute_rdns(ip) + result = run.parsed + + assert result.primary_hostname is None + assert result.primary_domain is None + assert result.hostname_count == 0 + assert result.hostnames == [] + + toolrun_manager.create_rdns_run(run) |
