Skip to content

DGGRIDPandas module

S2Pandas module for S2 cell operations on pandas DataFrames and GeoDataFrames.

DGGRIDPandas

Source code in vgridpandas/dggridpandas.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
@pd.api.extensions.register_dataframe_accessor("dggrid")
class DGGRIDPandas:
    def __init__(self, df: DataFrame):
        self._df = df

    def latlon2dggrid(
        self,
        dggrid_instance,
        dggs_type: str,
        resolution: int,
        lat_col: str = "lat",
        lon_col: str = "lon",
        set_index: bool = False,
        address_type: str = "SEQNUM",
    ) -> AnyDataFrame:
        """Adds dggrid id to (Geo)DataFrame.

        pd.DataFrame: uses `lat_col` and `lon_col` (default `lat` and `lon`)
        gpd.GeoDataFrame: uses `geometry`

        Assumes coordinates in epsg=4326.

        Parameters
        ----------
        dggrid_instance : DGGRIDv7
            DGGRID instance
        dggs_type : str
            dggrid type
        resolution : int
            dggrid resolution
        lat_col : str
            Name of the latitude column (if used), default 'lat'
        lon_col : str
            Name of the longitude column (if used), default 'lon'
        set_index : bool
            If True, the columns with dggrid id is set as index, default 'True'
        address_type : str
            Address type, default 'SEQNUM'
        Returns
        -------
        (Geo)DataFrame with dggrid ids added

        """

        if isinstance(self._df, gpd.GeoDataFrame):
            lons = self._df.geometry.x
            lats = self._df.geometry.y
        else:
            lons = self._df[lon_col]
            lats = self._df[lat_col]

        dggrid_ids = [
            latlon_to_dggrid(
                dggrid_instance, dggs_type, lat, lon, resolution, address_type
            )
            for lat, lon in zip(lats, lons)
        ]

        dggrid_col = f"dggrid_{dggs_type.lower()}"
        assign_arg = {dggrid_col: dggrid_ids, f"{dggrid_col}_res": resolution}
        df = self._df.assign(**assign_arg)
        if set_index:
            return df.set_index(dggrid_col)
        return df

    def dggrid2geo(
        self,
        dggrid_instance,
        dggs_type: str,
        resolution: int,
        dggrid_col: str = None,
        address_type: str = "SEQNUM",
    ) -> GeoDataFrame:
        """Add geometry with DGGRID geometry to the DataFrame. Assumes DGGRID id.

        Parameters
        ----------
        dggrid_instance : DGGRIDv7
            DGGRID instance
        dggs_type : str
            DGGRID type
        resolution : int
            DGGRID resolution
        dggrid_col : str, optional
            Name of the column containing DGGRID ids. Defaults to ``dggrid_{dggs_type}``.
        address_type : str
            Address type, default 'SEQNUM'

        Returns
        -------
        GeoDataFrame with DGGRID geometry

        Raises
        ------
        ValueError
            When an invalid DGGRID id is encountered
        """
        if dggrid_col is None:
            dggrid_col = f"dggrid_{dggs_type.lower()}"
        if dggrid_col not in self._df.columns:
            raise ValueError(f"Column '{dggrid_col}' not found in DataFrame")

        def to_geo(token):
            gdf = dggrid_to_geo(
                dggrid_instance, dggs_type, token, resolution, address_type
            )
            return gdf.geometry.iloc[0] if gdf is not None and len(gdf) else Polygon()

        return dggs_ids_to_geodataframe(self._df, self._df[dggrid_col], to_geo)

    def dggridbin(
        self,
        dggrid_instance,
        dggs_type: str,
        resolution: int,
        stats: str = "count",
        numeric_col: str = None,
        category_col: str = None,
        lat_col: str = "lat",
        lon_col: str = "lon",
        address_type: str = "SEQNUM",
    ) -> GeoDataFrame:
        """Bin points into DGGRID cells and compute statistics."""
        dggrid_col = f"dggrid_{dggs_type.lower()}"
        df = self.latlon2dggrid(
            dggrid_instance,
            dggs_type,
            resolution,
            lat_col,
            lon_col,
            address_type=address_type,
        )
        result = aggregate_bin(df, dggrid_col, stats, numeric_col, category_col)
        return result.dggrid.dggrid2geo(
            dggrid_instance,
            dggs_type,
            resolution,
            dggrid_col=dggrid_col,
            address_type=address_type,
        )

dggrid2geo(dggrid_instance, dggs_type, resolution, dggrid_col=None, address_type='SEQNUM')

Add geometry with DGGRID geometry to the DataFrame. Assumes DGGRID id.

Parameters

dggrid_instance : DGGRIDv7 DGGRID instance dggs_type : str DGGRID type resolution : int DGGRID resolution dggrid_col : str, optional Name of the column containing DGGRID ids. Defaults to dggrid_{dggs_type}. address_type : str Address type, default 'SEQNUM'

Returns

GeoDataFrame with DGGRID geometry

Raises

ValueError When an invalid DGGRID id is encountered

Source code in vgridpandas/dggridpandas.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def dggrid2geo(
    self,
    dggrid_instance,
    dggs_type: str,
    resolution: int,
    dggrid_col: str = None,
    address_type: str = "SEQNUM",
) -> GeoDataFrame:
    """Add geometry with DGGRID geometry to the DataFrame. Assumes DGGRID id.

    Parameters
    ----------
    dggrid_instance : DGGRIDv7
        DGGRID instance
    dggs_type : str
        DGGRID type
    resolution : int
        DGGRID resolution
    dggrid_col : str, optional
        Name of the column containing DGGRID ids. Defaults to ``dggrid_{dggs_type}``.
    address_type : str
        Address type, default 'SEQNUM'

    Returns
    -------
    GeoDataFrame with DGGRID geometry

    Raises
    ------
    ValueError
        When an invalid DGGRID id is encountered
    """
    if dggrid_col is None:
        dggrid_col = f"dggrid_{dggs_type.lower()}"
    if dggrid_col not in self._df.columns:
        raise ValueError(f"Column '{dggrid_col}' not found in DataFrame")

    def to_geo(token):
        gdf = dggrid_to_geo(
            dggrid_instance, dggs_type, token, resolution, address_type
        )
        return gdf.geometry.iloc[0] if gdf is not None and len(gdf) else Polygon()

    return dggs_ids_to_geodataframe(self._df, self._df[dggrid_col], to_geo)

dggridbin(dggrid_instance, dggs_type, resolution, stats='count', numeric_col=None, category_col=None, lat_col='lat', lon_col='lon', address_type='SEQNUM')

Bin points into DGGRID cells and compute statistics.

Source code in vgridpandas/dggridpandas.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def dggridbin(
    self,
    dggrid_instance,
    dggs_type: str,
    resolution: int,
    stats: str = "count",
    numeric_col: str = None,
    category_col: str = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
    address_type: str = "SEQNUM",
) -> GeoDataFrame:
    """Bin points into DGGRID cells and compute statistics."""
    dggrid_col = f"dggrid_{dggs_type.lower()}"
    df = self.latlon2dggrid(
        dggrid_instance,
        dggs_type,
        resolution,
        lat_col,
        lon_col,
        address_type=address_type,
    )
    result = aggregate_bin(df, dggrid_col, stats, numeric_col, category_col)
    return result.dggrid.dggrid2geo(
        dggrid_instance,
        dggs_type,
        resolution,
        dggrid_col=dggrid_col,
        address_type=address_type,
    )

latlon2dggrid(dggrid_instance, dggs_type, resolution, lat_col='lat', lon_col='lon', set_index=False, address_type='SEQNUM')

Adds dggrid id to (Geo)DataFrame.

pd.DataFrame: uses lat_col and lon_col (default lat and lon) gpd.GeoDataFrame: uses geometry

Assumes coordinates in epsg=4326.

Parameters

dggrid_instance : DGGRIDv7 DGGRID instance dggs_type : str dggrid type resolution : int dggrid resolution lat_col : str Name of the latitude column (if used), default 'lat' lon_col : str Name of the longitude column (if used), default 'lon' set_index : bool If True, the columns with dggrid id is set as index, default 'True' address_type : str Address type, default 'SEQNUM' Returns


(Geo)DataFrame with dggrid ids added

Source code in vgridpandas/dggridpandas.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def latlon2dggrid(
    self,
    dggrid_instance,
    dggs_type: str,
    resolution: int,
    lat_col: str = "lat",
    lon_col: str = "lon",
    set_index: bool = False,
    address_type: str = "SEQNUM",
) -> AnyDataFrame:
    """Adds dggrid id to (Geo)DataFrame.

    pd.DataFrame: uses `lat_col` and `lon_col` (default `lat` and `lon`)
    gpd.GeoDataFrame: uses `geometry`

    Assumes coordinates in epsg=4326.

    Parameters
    ----------
    dggrid_instance : DGGRIDv7
        DGGRID instance
    dggs_type : str
        dggrid type
    resolution : int
        dggrid resolution
    lat_col : str
        Name of the latitude column (if used), default 'lat'
    lon_col : str
        Name of the longitude column (if used), default 'lon'
    set_index : bool
        If True, the columns with dggrid id is set as index, default 'True'
    address_type : str
        Address type, default 'SEQNUM'
    Returns
    -------
    (Geo)DataFrame with dggrid ids added

    """

    if isinstance(self._df, gpd.GeoDataFrame):
        lons = self._df.geometry.x
        lats = self._df.geometry.y
    else:
        lons = self._df[lon_col]
        lats = self._df[lat_col]

    dggrid_ids = [
        latlon_to_dggrid(
            dggrid_instance, dggs_type, lat, lon, resolution, address_type
        )
        for lat, lon in zip(lats, lons)
    ]

    dggrid_col = f"dggrid_{dggs_type.lower()}"
    assign_arg = {dggrid_col: dggrid_ids, f"{dggrid_col}_res": resolution}
    df = self._df.assign(**assign_arg)
    if set_index:
        return df.set_index(dggrid_col)
    return df