aboutsummaryrefslogtreecommitdiff
path: root/generalresearch/utils/aggregation.py
blob: b168e4c58d0b1071b1e09458a6a59d1f4fd32433 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from collections import defaultdict
from typing import Dict, List


def group_by_year(records: List[Dict], datetime_field: str) -> Dict[int, List]:
    """Memory efficient - processes records one at a time"""
    by_year = defaultdict(list)

    for record in records:
        # Extract year from ISO string without full datetime parsing
        year = int(record[datetime_field][:4])
        by_year[year].append(record)

    return dict(by_year)