Skip to content

Models API

Data models for the DIG processing pipeline.

AuditTrail dataclass

Immutable processing history.

Each step is appended (never removed). The trail can be serialized to JSON for reproducibility.

Source code in dig/models/audit.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
@dataclass
class AuditTrail:
    """Immutable processing history.

    Each step is appended (never removed). The trail can be
    serialized to JSON for reproducibility.
    """

    steps: list[ProcessingStep] = field(default_factory=list)

    def add_step(self, name: str, parameters: dict | None = None) -> ProcessingStep:
        """Append a processing step to the trail."""
        parent = self.steps[-1].timestamp.isoformat() if self.steps else None
        step = ProcessingStep(
            name=name,
            parameters=parameters or {},
            parent_step=parent,
        )
        self.steps.append(step)
        return step

    def to_json(self) -> str:
        """Serialize the audit trail to JSON."""
        return json.dumps(
            [s.to_dict() for s in self.steps],
            indent=2,
        )

    def save(self, path: str) -> None:
        """Save the audit trail to a JSON file."""
        with open(path, "w") as f:
            f.write(self.to_json())

    @classmethod
    def from_json(cls, path: str) -> "AuditTrail":
        """Load an audit trail from a JSON file."""
        with open(path) as f:
            data = json.load(f)
        trail = cls()
        for item in data:
            step = ProcessingStep(
                name=item["name"],
                parameters=item.get("parameters", {}),
                timestamp=datetime.fromisoformat(item["timestamp"]),
                software_version=item.get("software_version", "unknown"),
                parent_step=item.get("parent_step"),
            )
            trail.steps.append(step)
        return trail

    def __len__(self) -> int:
        return len(self.steps)

add_step(name, parameters=None)

Append a processing step to the trail.

Source code in dig/models/audit.py
44
45
46
47
48
49
50
51
52
53
def add_step(self, name: str, parameters: dict | None = None) -> ProcessingStep:
    """Append a processing step to the trail."""
    parent = self.steps[-1].timestamp.isoformat() if self.steps else None
    step = ProcessingStep(
        name=name,
        parameters=parameters or {},
        parent_step=parent,
    )
    self.steps.append(step)
    return step

from_json(path) classmethod

Load an audit trail from a JSON file.

Source code in dig/models/audit.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
@classmethod
def from_json(cls, path: str) -> "AuditTrail":
    """Load an audit trail from a JSON file."""
    with open(path) as f:
        data = json.load(f)
    trail = cls()
    for item in data:
        step = ProcessingStep(
            name=item["name"],
            parameters=item.get("parameters", {}),
            timestamp=datetime.fromisoformat(item["timestamp"]),
            software_version=item.get("software_version", "unknown"),
            parent_step=item.get("parent_step"),
        )
        trail.steps.append(step)
    return trail

save(path)

Save the audit trail to a JSON file.

Source code in dig/models/audit.py
62
63
64
65
def save(self, path: str) -> None:
    """Save the audit trail to a JSON file."""
    with open(path, "w") as f:
        f.write(self.to_json())

to_json()

Serialize the audit trail to JSON.

Source code in dig/models/audit.py
55
56
57
58
59
60
def to_json(self) -> str:
    """Serialize the audit trail to JSON."""
    return json.dumps(
        [s.to_dict() for s in self.steps],
        indent=2,
    )

Grid3D dataclass

A 3D GPR volume assembled from multiple parallel profiles.

Dimensions: (inline, crossline, depth/time)

Source code in dig/models/grid.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
@dataclass
class Grid3D:
    """A 3D GPR volume assembled from multiple parallel profiles.

    Dimensions: (inline, crossline, depth/time)
    """

    data: np.ndarray  # 3D array (inline, crossline, depth)
    inline_spacing_m: float = 0.05
    crossline_spacing_m: float = 0.5
    sample_interval_ns: float = 0.1

    # Coordinate reference
    origin_easting: float = 0.0
    origin_northing: float = 0.0
    rotation_deg: float = 0.0

    @property
    def shape(self) -> tuple[int, int, int]:
        return self.data.shape

    @property
    def n_inline(self) -> int:
        return self.data.shape[0]

    @property
    def n_crossline(self) -> int:
        return self.data.shape[1]

    @property
    def n_depth(self) -> int:
        return self.data.shape[2]

    def time_slice(self, depth_index: int) -> np.ndarray:
        """Extract a single time/depth slice."""
        return self.data[:, :, depth_index]

    def inline_section(self, inline_index: int) -> np.ndarray:
        """Extract a single inline profile."""
        return self.data[inline_index, :, :]

    def crossline_section(self, crossline_index: int) -> np.ndarray:
        """Extract a single crossline profile."""
        return self.data[:, crossline_index, :]

crossline_section(crossline_index)

Extract a single crossline profile.

Source code in dig/models/grid.py
49
50
51
def crossline_section(self, crossline_index: int) -> np.ndarray:
    """Extract a single crossline profile."""
    return self.data[:, crossline_index, :]

inline_section(inline_index)

Extract a single inline profile.

Source code in dig/models/grid.py
45
46
47
def inline_section(self, inline_index: int) -> np.ndarray:
    """Extract a single inline profile."""
    return self.data[inline_index, :, :]

time_slice(depth_index)

Extract a single time/depth slice.

Source code in dig/models/grid.py
41
42
43
def time_slice(self, depth_index: int) -> np.ndarray:
    """Extract a single time/depth slice."""
    return self.data[:, :, depth_index]

MagnetometryGrid dataclass

A 2D magnetometry grid.

Represents gridded magnetic gradient data with spatial metadata.

Source code in dig/models/magnetometry_grid.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
@dataclass
class MagnetometryGrid:
    """A 2D magnetometry grid.

    Represents gridded magnetic gradient data with spatial metadata.
    """

    data: np.ndarray  # 2D array (rows, cols)
    cell_size_m: float = 0.5
    origin_easting: float = 0.0
    origin_northing: float = 0.0
    rotation_deg: float = 0.0

    @property
    def shape(self) -> tuple[int, int]:
        return self.data.shape

    @property
    def n_rows(self) -> int:
        return self.data.shape[0]

    @property
    def n_cols(self) -> int:
        return self.data.shape[1]

    @property
    def extent_m(self) -> tuple[float, float, float, float]:
        """Bounding box in metres (west, east, south, north)."""
        w = self.origin_easting
        e = w + self.n_cols * self.cell_size_m
        s = self.origin_northing
        n = s + self.n_rows * self.cell_size_m
        return (w, e, s, n)

extent_m property

Bounding box in metres (west, east, south, north).

ProcessingStep dataclass

A single processing step in the audit trail.

Source code in dig/models/audit.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
@dataclass
class ProcessingStep:
    """A single processing step in the audit trail."""

    name: str
    parameters: dict
    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    software_version: str = "0.1.0"
    parent_step: Optional[str] = None  # ID of the previous step

    def to_dict(self) -> dict:
        return {
            "name": self.name,
            "parameters": self.parameters,
            "timestamp": self.timestamp.isoformat(),
            "software_version": self.software_version,
            "parent_step": self.parent_step,
        }

Profile dataclass

A single 2D GPR profile (radargram).

Represents one survey line with trace data and spatial metadata.

Source code in dig/models/profile.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
@dataclass
class Profile:
    """A single 2D GPR profile (radargram).

    Represents one survey line with trace data and spatial metadata.
    """

    name: str
    data: np.ndarray  # (traces, samples)
    trace_spacing_m: float = 0.05
    sample_interval_ns: float = 0.1
    start_position_m: float = 0.0
    elevation_m: float = 0.0

    @property
    def num_traces(self) -> int:
        return self.data.shape[0]

    @property
    def num_samples(self) -> int:
        return self.data.shape[1]

    @property
    def time_window_ns(self) -> float:
        return self.num_samples * self.sample_interval_ns

    @property
    def length_m(self) -> float:
        return self.num_traces * self.trace_spacing_m

Survey dataclass

A geophysical survey — the central data object.

Holds memory-mapped trace data, header metadata, coordinate system, and an immutable processing history DAG.

Source code in dig/models/survey.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
@dataclass
class Survey:
    """A geophysical survey — the central data object.

    Holds memory-mapped trace data, header metadata, coordinate
    system, and an immutable processing history DAG.
    """

    path: Path
    format: str  # "dzt", "dt1", "magnetometry", "segy"

    # Data dimensions
    num_traces: int = 0
    samples_per_trace: int = 0
    bits_per_sample: int = 16

    # Time/depth parameters
    sample_interval_ns: float = 0.0
    time_window_ns: float = 0.0
    time_zero_ns: float = 0.0

    # Survey parameters
    traces_per_second: float = 0.0
    traces_per_meter: float = 0.0
    channels: int = 1

    # Spatial reference
    crs_epsg: Optional[int] = None
    origin_easting: float = 0.0
    origin_northing: float = 0.0
    rotation_deg: float = 0.0
    pixel_size_m: float = 0.0

    # Processing history (immutable DAG)
    audit: AuditTrail = field(default_factory=AuditTrail)

    # Data (memory-mapped, not loaded eagerly)
    _data: Optional[np.ndarray] = field(default=None, repr=False)

    @property
    def data(self) -> Optional[np.ndarray]:
        """Access the trace data array.

        Returns None if not loaded. Use load() to explicitly load.
        """
        return self._data

    def load(self) -> np.ndarray:
        """Load data into memory (for processing/visualization).

        For large surveys, prefer memory-mapped access via parsers.
        """
        if self._data is None:
            raise RuntimeError(
                "Data not loaded. Use a parser (DZTFile, DT1File, etc.) "
                "to create a Survey with data."
            )
        return self._data

    @property
    def shape(self) -> tuple[int, int]:
        """Data shape: (num_traces, samples_per_trace)."""
        return (self.num_traces, self.samples_per_trace)

    def __repr__(self) -> str:
        return (
            f"Survey(format={self.format}, "
            f"traces={self.num_traces}, "
            f"samples={self.samples_per_trace}, "
            f"steps={len(self.audit.steps)})"
        )

data property

Access the trace data array.

Returns None if not loaded. Use load() to explicitly load.

shape property

Data shape: (num_traces, samples_per_trace).

load()

Load data into memory (for processing/visualization).

For large surveys, prefer memory-mapped access via parsers.

Source code in dig/models/survey.py
59
60
61
62
63
64
65
66
67
68
69
def load(self) -> np.ndarray:
    """Load data into memory (for processing/visualization).

    For large surveys, prefer memory-mapped access via parsers.
    """
    if self._data is None:
        raise RuntimeError(
            "Data not loaded. Use a parser (DZTFile, DT1File, etc.) "
            "to create a Survey with data."
        )
    return self._data