Skip to content

H3Pandas module

H3Pandas

Source code in vgridpandas/h3pandas.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
@pd.api.extensions.register_dataframe_accessor("h3")
class H3Pandas:
    def __init__(self, df: DataFrame):
        self._df = df

    def latlon2h3(
        self,
        resolution: int,
        lat_col: str = "lat",
        lon_col: str = "lon",
        set_index: bool = False,
    ) -> AnyDataFrame:
        """Adds H3 index to (Geo)DataFrame.

        pd.DataFrame: uses `lat_col` and `lon_col` (default `lat` and `lon`)
        gpd.GeoDataFrame: uses `geometry`

        Assumes coordinates in epsg=4326.

        Parameters
        ----------
        resolution : int
            H3 resolution
        lat_col : str
            Name of the latitude column (if used), default 'lat'
        lon_col : str
            Name of the longitude column (if used), default 'lon'
        set_index : bool
            If True, the column with H3 ID is set as index, default False

        Returns
        -------
        (Geo)DataFrame with H3 ID added

        See Also
        --------
        geo2h3_aggregate : Extended API method that aggregates points by H3 id

        Examples
        --------
        >>> df = pd.DataFrame({'lat': [50, 51], 'lon':[14, 15]})
        >>> df.h3.latlon2h3(8)
                         lat  lon
        h3
        881e309739fffff   50   14
        881e2659c3fffff   51   15

        >>> df.h3.latlon2h3(8, set_index=False)
           lat  lon            h3
        0   50   14  881e309739fffff
        1   51   15  881e2659c3fffff

        >>> gdf = gpd.GeoDataFrame({'val': [5, 1]},
        >>> geometry=gpd.points_from_xy(x=[14, 15], y=(50, 51)))
        >>> gdf.h3.latlon2h3(8)
                         val                   geometry
        h3
        881e309739fffff    5  POINT (14.00000 50.00000)
        881e2659c3fffff    1  POINT (15.00000 51.00000)

        """
        resolution = validate_h3_resolution(resolution)
        if isinstance(self._df, gpd.GeoDataFrame):
            lons = self._df.geometry.x
            lats = self._df.geometry.y
        else:
            lons = self._df[lon_col]
            lats = self._df[lat_col]

        h3_ids = [latlon_to_h3(lat, lon, resolution) for lat, lon in zip(lats, lons)]

        h3_col = H3_COL
        assign_arg = {h3_col: h3_ids, f"{h3_col}_res": resolution}
        df = self._df.assign(**assign_arg)
        if set_index:
            return df.set_index(h3_col)
        return df

    def h32geo(
        self, h3_col: str = None, fix_antimeridian: Optional[str] = None
    ) -> GeoDataFrame:
        """Add geometry with H3 geometry to the DataFrame."""
        if h3_col is not None:
            if h3_col not in self._df.columns:
                raise ValueError(f"Column '{h3_col}' not found in DataFrame")
            ids = self._df[h3_col]
        else:
            if H3_COL not in self._df.columns:
                raise ValueError(f"Column '{H3_COL}' not found in DataFrame")
            ids = self._df[H3_COL]
        return dggs_ids_to_geodataframe(
            self._df, ids, h3_to_geo, fix_antimeridian=fix_antimeridian
        )

    def h3bin(
        self,
        resolution: int,
        stats: str = "count",
        numeric_col: str = None,
        category_col: str = None,
        lat_col: str = "lat",
        lon_col: str = "lon",
        fix_antimeridian: Optional[str] = None,
    ) -> GeoDataFrame:
        """
        Bin points into h3 cells and compute statistics.
        """
        h3_col = H3_COL
        df = self.latlon2h3(resolution, lat_col, lon_col)
        result = aggregate_bin(df, h3_col, stats, numeric_col, category_col)
        return result.h3.h32geo(h3_col=h3_col, fix_antimeridian=fix_antimeridian)

    def polyfill(
        self,
        resolution: int,
        predicate: str = None,
        compact: bool = False,
        explode: bool = False,
        fix_antimeridian: Optional[str] = None,
    ) -> AnyDataFrame:
        """
        Parameters
        ----------
        resolution : int
            H3 resolution
        predicate : str, optional
            Spatial predicate to apply ('intersect', 'within', 'centroid_within', 'largest_overlap')
        compact : bool, optional
            Enable H3 compact mode
        explode : bool
            If True, will explode the resulting list vertically.
            All other columns' values are copied.
            Default: False
        fix_antimeridian : str, optional
            Antimeridian fix: 'shift', 'shift_balanced', 'shift_west', 'shift_east', or 'split'
        """

        result = self._df.geometry.apply(
            lambda geom: polyfill_row(
                geom, resolution, predicate, compact, fix_antimeridian
            )
        )

        if not explode:
            return self._df.assign(**{H3_COL: result})

        result = result.explode().to_frame(H3_COL)
        return self._df.join(result)

    def linetrace(self, resolution: int, explode: bool = False) -> AnyDataFrame:
        """An H3 cell representation of a (Multi)LineString traced along its vertices.

        Parameters
        ----------
        resolution : int
            H3 resolution
        explode : bool
            If True, will explode the resulting list vertically.
            All other columns' values are copied.
            Default: False

        Returns
        -------
        (Geo)DataFrame with H3 cells with centroids within the input polygons.

        Examples
        --------
        >>> from shapely.geometry import LineString
        >>> gdf = gpd.GeoDataFrame(geometry=[LineString([[0, 0], [1, 0], [1, 1]])])
        >>> gdf.h3.linetrace(4)
                                                    geometry                                       h3_linetrace
        0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  [83754efffffffff, 83754cfffffffff, 837541fffff...  # noqa E501
        >>> gdf.h3.linetrace(4, explode=True)
                                                    geometry     h3_linetrace
        0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  83754efffffffff
        0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  83754cfffffffff
        0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  837541fffffffff

        """

        result = self._df.apply(
            lambda row: list(linetrace(row.geometry, resolution)), axis=1
        )
        if not explode:
            return self._df.assign(**{H3_COL: result})

        result = result.explode().to_frame(H3_COL)
        return self._df.join(result)

h32geo(h3_col=None, fix_antimeridian=None)

Add geometry with H3 geometry to the DataFrame.

Source code in vgridpandas/h3pandas.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def h32geo(
    self, h3_col: str = None, fix_antimeridian: Optional[str] = None
) -> GeoDataFrame:
    """Add geometry with H3 geometry to the DataFrame."""
    if h3_col is not None:
        if h3_col not in self._df.columns:
            raise ValueError(f"Column '{h3_col}' not found in DataFrame")
        ids = self._df[h3_col]
    else:
        if H3_COL not in self._df.columns:
            raise ValueError(f"Column '{H3_COL}' not found in DataFrame")
        ids = self._df[H3_COL]
    return dggs_ids_to_geodataframe(
        self._df, ids, h3_to_geo, fix_antimeridian=fix_antimeridian
    )

h3bin(resolution, stats='count', numeric_col=None, category_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None)

Bin points into h3 cells and compute statistics.

Source code in vgridpandas/h3pandas.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def h3bin(
    self,
    resolution: int,
    stats: str = "count",
    numeric_col: str = None,
    category_col: str = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
    fix_antimeridian: Optional[str] = None,
) -> GeoDataFrame:
    """
    Bin points into h3 cells and compute statistics.
    """
    h3_col = H3_COL
    df = self.latlon2h3(resolution, lat_col, lon_col)
    result = aggregate_bin(df, h3_col, stats, numeric_col, category_col)
    return result.h3.h32geo(h3_col=h3_col, fix_antimeridian=fix_antimeridian)

latlon2h3(resolution, lat_col='lat', lon_col='lon', set_index=False)

Adds H3 index to (Geo)DataFrame.

pd.DataFrame: uses lat_col and lon_col (default lat and lon) gpd.GeoDataFrame: uses geometry

Assumes coordinates in epsg=4326.

Parameters

resolution : int H3 resolution lat_col : str Name of the latitude column (if used), default 'lat' lon_col : str Name of the longitude column (if used), default 'lon' set_index : bool If True, the column with H3 ID is set as index, default False

Returns

(Geo)DataFrame with H3 ID added

See Also

geo2h3_aggregate : Extended API method that aggregates points by H3 id

Examples

df = pd.DataFrame({'lat': [50, 51], 'lon':[14, 15]}) df.h3.latlon2h3(8) lat lon h3 881e309739fffff 50 14 881e2659c3fffff 51 15

df.h3.latlon2h3(8, set_index=False) lat lon h3 0 50 14 881e309739fffff 1 51 15 881e2659c3fffff

gdf = gpd.GeoDataFrame({'val': [5, 1]}, geometry=gpd.points_from_xy(x=[14, 15], y=(50, 51))) gdf.h3.latlon2h3(8) val geometry h3 881e309739fffff 5 POINT (14.00000 50.00000) 881e2659c3fffff 1 POINT (15.00000 51.00000)

Source code in vgridpandas/h3pandas.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def latlon2h3(
    self,
    resolution: int,
    lat_col: str = "lat",
    lon_col: str = "lon",
    set_index: bool = False,
) -> AnyDataFrame:
    """Adds H3 index to (Geo)DataFrame.

    pd.DataFrame: uses `lat_col` and `lon_col` (default `lat` and `lon`)
    gpd.GeoDataFrame: uses `geometry`

    Assumes coordinates in epsg=4326.

    Parameters
    ----------
    resolution : int
        H3 resolution
    lat_col : str
        Name of the latitude column (if used), default 'lat'
    lon_col : str
        Name of the longitude column (if used), default 'lon'
    set_index : bool
        If True, the column with H3 ID is set as index, default False

    Returns
    -------
    (Geo)DataFrame with H3 ID added

    See Also
    --------
    geo2h3_aggregate : Extended API method that aggregates points by H3 id

    Examples
    --------
    >>> df = pd.DataFrame({'lat': [50, 51], 'lon':[14, 15]})
    >>> df.h3.latlon2h3(8)
                     lat  lon
    h3
    881e309739fffff   50   14
    881e2659c3fffff   51   15

    >>> df.h3.latlon2h3(8, set_index=False)
       lat  lon            h3
    0   50   14  881e309739fffff
    1   51   15  881e2659c3fffff

    >>> gdf = gpd.GeoDataFrame({'val': [5, 1]},
    >>> geometry=gpd.points_from_xy(x=[14, 15], y=(50, 51)))
    >>> gdf.h3.latlon2h3(8)
                     val                   geometry
    h3
    881e309739fffff    5  POINT (14.00000 50.00000)
    881e2659c3fffff    1  POINT (15.00000 51.00000)

    """
    resolution = validate_h3_resolution(resolution)
    if isinstance(self._df, gpd.GeoDataFrame):
        lons = self._df.geometry.x
        lats = self._df.geometry.y
    else:
        lons = self._df[lon_col]
        lats = self._df[lat_col]

    h3_ids = [latlon_to_h3(lat, lon, resolution) for lat, lon in zip(lats, lons)]

    h3_col = H3_COL
    assign_arg = {h3_col: h3_ids, f"{h3_col}_res": resolution}
    df = self._df.assign(**assign_arg)
    if set_index:
        return df.set_index(h3_col)
    return df

linetrace(resolution, explode=False)

An H3 cell representation of a (Multi)LineString traced along its vertices.

Parameters

resolution : int H3 resolution explode : bool If True, will explode the resulting list vertically. All other columns' values are copied. Default: False

Returns

(Geo)DataFrame with H3 cells with centroids within the input polygons.

Examples

from shapely.geometry import LineString gdf = gpd.GeoDataFrame(geometry=[LineString([[0, 0], [1, 0], [1, 1]])]) gdf.h3.linetrace(4) geometry h3_linetrace 0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... [83754efffffffff, 83754cfffffffff, 837541fffff... # noqa E501 gdf.h3.linetrace(4, explode=True) geometry h3_linetrace 0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 83754efffffffff 0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 83754cfffffffff 0 LINESTRING (0.00000 0.00000, 1.00000 0.00000, ... 837541fffffffff

Source code in vgridpandas/h3pandas.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def linetrace(self, resolution: int, explode: bool = False) -> AnyDataFrame:
    """An H3 cell representation of a (Multi)LineString traced along its vertices.

    Parameters
    ----------
    resolution : int
        H3 resolution
    explode : bool
        If True, will explode the resulting list vertically.
        All other columns' values are copied.
        Default: False

    Returns
    -------
    (Geo)DataFrame with H3 cells with centroids within the input polygons.

    Examples
    --------
    >>> from shapely.geometry import LineString
    >>> gdf = gpd.GeoDataFrame(geometry=[LineString([[0, 0], [1, 0], [1, 1]])])
    >>> gdf.h3.linetrace(4)
                                                geometry                                       h3_linetrace
    0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  [83754efffffffff, 83754cfffffffff, 837541fffff...  # noqa E501
    >>> gdf.h3.linetrace(4, explode=True)
                                                geometry     h3_linetrace
    0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  83754efffffffff
    0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  83754cfffffffff
    0  LINESTRING (0.00000 0.00000, 1.00000 0.00000, ...  837541fffffffff

    """

    result = self._df.apply(
        lambda row: list(linetrace(row.geometry, resolution)), axis=1
    )
    if not explode:
        return self._df.assign(**{H3_COL: result})

    result = result.explode().to_frame(H3_COL)
    return self._df.join(result)

polyfill(resolution, predicate=None, compact=False, explode=False, fix_antimeridian=None)

Parameters

resolution : int H3 resolution predicate : str, optional Spatial predicate to apply ('intersect', 'within', 'centroid_within', 'largest_overlap') compact : bool, optional Enable H3 compact mode explode : bool If True, will explode the resulting list vertically. All other columns' values are copied. Default: False fix_antimeridian : str, optional Antimeridian fix: 'shift', 'shift_balanced', 'shift_west', 'shift_east', or 'split'

Source code in vgridpandas/h3pandas.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def polyfill(
    self,
    resolution: int,
    predicate: str = None,
    compact: bool = False,
    explode: bool = False,
    fix_antimeridian: Optional[str] = None,
) -> AnyDataFrame:
    """
    Parameters
    ----------
    resolution : int
        H3 resolution
    predicate : str, optional
        Spatial predicate to apply ('intersect', 'within', 'centroid_within', 'largest_overlap')
    compact : bool, optional
        Enable H3 compact mode
    explode : bool
        If True, will explode the resulting list vertically.
        All other columns' values are copied.
        Default: False
    fix_antimeridian : str, optional
        Antimeridian fix: 'shift', 'shift_balanced', 'shift_west', 'shift_east', or 'split'
    """

    result = self._df.geometry.apply(
        lambda geom: polyfill_row(
            geom, resolution, predicate, compact, fix_antimeridian
        )
    )

    if not explode:
        return self._df.assign(**{H3_COL: result})

    result = result.explode().to_frame(H3_COL)
    return self._df.join(result)

linetrace(geometry, resolution)

h3.polyfill equivalent for shapely (Multi)LineString.

Cells may repeat at self-intersections or shared vertices.

Parameters

geometry : LineString or MultiLineString Line to trace with H3 cells resolution : int H3 resolution of the tracing cells

Returns

Set of H3 IDs

Raises

TypeError if geometry is not a LineString or a MultiLineString

Source code in vgridpandas/h3pandas.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def linetrace(geometry: MultiLineOrLine, resolution: int) -> Iterator[str]:
    """h3.polyfill equivalent for shapely (Multi)LineString.

    Cells may repeat at self-intersections or shared vertices.

    Parameters
    ----------
    geometry : LineString or MultiLineString
        Line to trace with H3 cells
    resolution : int
        H3 resolution of the tracing cells

    Returns
    -------
    Set of H3 IDs

    Raises
    ------
    TypeError if geometry is not a LineString or a MultiLineString
    """
    if isinstance(geometry, MultiLineString):
        # Recurse after getting component linestrings from the multiline
        for line in map(lambda geom: linetrace(geom, resolution), geometry.geoms):
            yield from line
    elif isinstance(geometry, LineString):
        coords = zip(geometry.coords, geometry.coords[1:])
        while (vertex_pair := next(coords, None)) is not None:
            i, j = vertex_pair
            a = h3.latlng_to_cell(*i[::-1], resolution)
            b = h3.latlng_to_cell(*j[::-1], resolution)
            yield from h3.grid_path_cells(a, b)  # inclusive of a and b
    else:
        raise TypeError(f"Unknown type {type(geometry)}")

poly2h3(geometry, resolution, predicate=None, compact=False, fix_antimeridian=None)

Convert polygon geometries (Polygon, MultiPolygon) to H3 grid cells.

Parameters:

Name Type Description Default
resolution int

H3 resolution level [0..15]

required
geometry Polygon or MultiPolygon

Polygon geometry to convert

required
predicate str

Spatial predicate to apply ('intersect', 'within', 'centroid_within', 'largest_overlap')

None
compact bool

Enable H3 compact mode

False
fix_antimeridian str

'shift', 'shift_balanced', 'shift_west', 'shift_east', or 'split'

None
Example

from shapely.geometry import Polygon poly = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.9), (-122.5, 37.9)]) cells = poly2h3(poly, 10, predicate="intersect", compact=True) len(cells) > 0 True

Source code in vgridpandas/h3pandas.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def poly2h3(geometry, resolution, predicate=None, compact=False, fix_antimeridian=None):
    """
    Convert polygon geometries (Polygon, MultiPolygon) to H3 grid cells.

    Args:
        resolution (int): H3 resolution level [0..15]
        geometry (shapely.geometry.Polygon or shapely.geometry.MultiPolygon): Polygon geometry to convert
        predicate (str, optional): Spatial predicate to apply ('intersect', 'within', 'centroid_within', 'largest_overlap')
        compact (bool): Enable H3 compact mode
        fix_antimeridian (str, optional): 'shift', 'shift_balanced', 'shift_west', 'shift_east', or 'split'
    Returns:
        list: List of H3 IDs intersecting the polygon

    Example:
        >>> from shapely.geometry import Polygon
        >>> poly = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.9), (-122.5, 37.9)])
        >>> cells = poly2h3(poly, 10, predicate="intersect", compact=True)
        >>> len(cells) > 0
        True
    """
    h3_ids = []
    if isinstance(geometry, (Polygon, LineString)):
        polys = [geometry]
    elif isinstance(geometry, (MultiPolygon, MultiLineString)):
        polys = list(geometry.geoms)
    else:
        return []

    for poly in polys:
        bbox = box(*poly.bounds)
        bbox_cells = h3.geo_to_cells(bbox, resolution)
        if compact:
            bbox_cells = h3.compact_cells(bbox_cells)

        for bbox_cell in bbox_cells:
            cell_polygon = h3_to_geo(bbox_cell, fix_antimeridian=fix_antimeridian)
            if not check_predicate(cell_polygon, poly, predicate):
                continue
            h3_ids.append(bbox_cell)

    return h3_ids

polyfill_row(geometry, resolution, predicate=None, compact=False, fix_antimeridian=None)

Return cell ids covering a single row geometry.

Source code in vgridpandas/h3pandas.py
105
106
107
108
109
110
111
112
113
114
115
116
117
def polyfill_row(
    geometry, resolution, predicate=None, compact=False, fix_antimeridian=None
) -> list:
    """Return cell ids covering a single row geometry."""
    if isinstance(geometry, (Polygon, MultiPolygon)):
        tokens = set(
            poly2h3(geometry, resolution, predicate, compact, fix_antimeridian)
        )
    elif isinstance(geometry, (LineString, MultiLineString)):
        tokens = set(linetrace(geometry, resolution))
    else:
        raise TypeError(f"Unknown type {type(geometry)}")
    return list(tokens)