Skip to content

GeorefPandas module

GEOREFPandas

Source code in vgridpandas/georefpandas.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
@pd.api.extensions.register_dataframe_accessor("georef")
class GEOREFPandas:
    def __init__(self, df: DataFrame):
        self._df = df

    # georef API
    # These methods simply mirror the Vgrid georef API and apply georef functions to all rows

    def latlon2georef(
        self,
        resolution: int,
        lat_col: str = "lat",
        lon_col: str = "lon",
        set_index: bool = False,
    ) -> AnyDataFrame:
        """Adds georef ID to (Geo)DataFrame.

        pd.DataFrame: uses `lat_col` and `lon_col` (default `lat` and `lon`)
        gpd.GeoDataFrame: uses `geometry`

        Assumes coordinates in epsg=4326.

        Parameters
        ----------
        resolution : int
            georef resolution
        lat_col : str
            Name of the latitude column (if used), default 'lat'
        lon_col : str
            Name of the longitude column (if used), default 'lon'
        set_index : bool
            If True, the columns with georef ID is set as index, default 'True'

        Returns
        -------
        (Geo)DataFrame with georef IDs added
        """
        if isinstance(self._df, gpd.GeoDataFrame):
            lons = self._df.geometry.x
            lats = self._df.geometry.y
        else:
            lons = self._df[lon_col]
            lats = self._df[lat_col]

        georef_ids = [
            latlon_to_georef(lat, lon, resolution) for lat, lon in zip(lats, lons)
        ]

        # georef_col = self._format_resolution(resolution)
        georef_col = GEOREF_COL
        assign_arg = {georef_col: georef_ids, f"{georef_col}_res": resolution}
        df = self._df.assign(**assign_arg)
        if set_index:
            return df.set_index(georef_col)
        return df

    def georef2geo(self, georef_col: str = None) -> GeoDataFrame:
        """Add geometry with GEOREF geometry to the DataFrame."""
        if georef_col is not None:
            if georef_col not in self._df.columns:
                raise ValueError(f"Column '{georef_col}' not found in DataFrame")
            ids = self._df[georef_col]
        else:
            if GEOREF_COL not in self._df.columns:
                raise ValueError(f"Column '{GEOREF_COL}' not found in DataFrame")
            ids = self._df[GEOREF_COL]
        return dggs_ids_to_geodataframe(self._df, ids, georef_to_geo)

    def georefbin(
        self,
        resolution: int,
        stats: str = "count",
        numeric_col: str = None,
        category_col: str = None,
        lat_col: str = "lat",
        lon_col: str = "lon",
    ) -> GeoDataFrame:
        """
        Bin points into georef cells and compute statistics.
        """
        georef_col = GEOREF_COL
        df = self.latlon2georef(resolution, lat_col, lon_col)
        result = aggregate_bin(df, georef_col, stats, numeric_col, category_col)
        return result.georef.georef2geo(georef_col=georef_col)

georef2geo(georef_col=None)

Add geometry with GEOREF geometry to the DataFrame.

Source code in vgridpandas/georefpandas.py
71
72
73
74
75
76
77
78
79
80
81
def georef2geo(self, georef_col: str = None) -> GeoDataFrame:
    """Add geometry with GEOREF geometry to the DataFrame."""
    if georef_col is not None:
        if georef_col not in self._df.columns:
            raise ValueError(f"Column '{georef_col}' not found in DataFrame")
        ids = self._df[georef_col]
    else:
        if GEOREF_COL not in self._df.columns:
            raise ValueError(f"Column '{GEOREF_COL}' not found in DataFrame")
        ids = self._df[GEOREF_COL]
    return dggs_ids_to_geodataframe(self._df, ids, georef_to_geo)

georefbin(resolution, stats='count', numeric_col=None, category_col=None, lat_col='lat', lon_col='lon')

Bin points into georef cells and compute statistics.

Source code in vgridpandas/georefpandas.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def georefbin(
    self,
    resolution: int,
    stats: str = "count",
    numeric_col: str = None,
    category_col: str = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
) -> GeoDataFrame:
    """
    Bin points into georef cells and compute statistics.
    """
    georef_col = GEOREF_COL
    df = self.latlon2georef(resolution, lat_col, lon_col)
    result = aggregate_bin(df, georef_col, stats, numeric_col, category_col)
    return result.georef.georef2geo(georef_col=georef_col)

latlon2georef(resolution, lat_col='lat', lon_col='lon', set_index=False)

Adds georef ID to (Geo)DataFrame.

pd.DataFrame: uses lat_col and lon_col (default lat and lon) gpd.GeoDataFrame: uses geometry

Assumes coordinates in epsg=4326.

Parameters

resolution : int georef resolution lat_col : str Name of the latitude column (if used), default 'lat' lon_col : str Name of the longitude column (if used), default 'lon' set_index : bool If True, the columns with georef ID is set as index, default 'True'

Returns

(Geo)DataFrame with georef IDs added

Source code in vgridpandas/georefpandas.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def latlon2georef(
    self,
    resolution: int,
    lat_col: str = "lat",
    lon_col: str = "lon",
    set_index: bool = False,
) -> AnyDataFrame:
    """Adds georef ID to (Geo)DataFrame.

    pd.DataFrame: uses `lat_col` and `lon_col` (default `lat` and `lon`)
    gpd.GeoDataFrame: uses `geometry`

    Assumes coordinates in epsg=4326.

    Parameters
    ----------
    resolution : int
        georef resolution
    lat_col : str
        Name of the latitude column (if used), default 'lat'
    lon_col : str
        Name of the longitude column (if used), default 'lon'
    set_index : bool
        If True, the columns with georef ID is set as index, default 'True'

    Returns
    -------
    (Geo)DataFrame with georef IDs added
    """
    if isinstance(self._df, gpd.GeoDataFrame):
        lons = self._df.geometry.x
        lats = self._df.geometry.y
    else:
        lons = self._df[lon_col]
        lats = self._df[lat_col]

    georef_ids = [
        latlon_to_georef(lat, lon, resolution) for lat, lon in zip(lats, lons)
    ]

    # georef_col = self._format_resolution(resolution)
    georef_col = GEOREF_COL
    assign_arg = {georef_col: georef_ids, f"{georef_col}_res": resolution}
    df = self._df.assign(**assign_arg)
    if set_index:
        return df.set_index(georef_col)
    return df