From 56d59ccf9cbe080c5167f9f51930d6a6863f527d Mon Sep 17 00:00:00 2001 From: Moritz Schott Date: Thu, 16 Oct 2025 13:41:21 +0200 Subject: [PATCH 1/2] feat: add output goemetry filter --- CHANGELOG.md | 1 + ohsome/response.py | 17 +++++++-- ohsome/test/data/mixed_geometries.geojson | 46 +++++++++++++++++++++++ ohsome/test/test_client.py | 23 ++++++++++++ 4 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 ohsome/test/data/mixed_geometries.geojson diff --git a/CHANGELOG.md b/CHANGELOG.md index 1358628..e24f91b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Added +- functionality to filter the output geometry types ([#178](https://github.com/GIScience/ohsome-py/issues/178)) - support for python 3.13 ## [0.4.0](https://github.com/GIScience/ohsome-py/releases/tag/v0.4.0) diff --git a/ohsome/response.py b/ohsome/response.py index c7b2cb0..6348438 100644 --- a/ohsome/response.py +++ b/ohsome/response.py @@ -22,7 +22,10 @@ def __init__(self, data: dict, url: str = None): self.url = url def as_dataframe( - self, multi_index: Optional[bool] = True, explode_tags: Optional[tuple] = () + self, + multi_index: Optional[bool] = True, + explode_tags: Optional[tuple] = (), + geometry_filter: Optional[list[str]] = None, ) -> Union[pd.DataFrame, gpd.GeoDataFrame]: """ Converts the ohsome response to a pandas.DataFrame or a geopandas.GeoDataFrame if the @@ -31,12 +34,14 @@ def as_dataframe( :param explode_tags: By default, tags of extracted features are stored in a single dict-column. You can specify a tuple of tags that should be popped from this column. To disable it completely, pass None. Yet, be aware that you may get a large but sparse data frame. + :param geometry_filter: Limit the output geometries to the specified types to prevent + https://github.com/GIScience/ohsome-api/issues/339 :return: pandas.DataFrame or geopandas.GeoDataFrame """ if "features" not in self.data.keys(): return self._as_dataframe(multi_index) else: - return self._as_geodataframe(multi_index, explode_tags) + return self._as_geodataframe(multi_index, explode_tags, geometry_filter) def _as_dataframe(self, multi_index=True) -> pd.DataFrame: groupby_names = [] @@ -70,7 +75,10 @@ def _as_dataframe(self, multi_index=True) -> pd.DataFrame: return result_df.sort_index() def _as_geodataframe( - self, multi_index: Optional[bool] = True, explode_tags: Optional[tuple] = () + self, + multi_index: Optional[bool] = True, + explode_tags: Optional[tuple] = (), + geom_filter: Optional[list[str]] = None, ) -> gpd.GeoDataFrame: if len(self.data["features"]) == 0: return gpd.GeoDataFrame( @@ -108,6 +116,9 @@ def _as_geodataframe( "This result type cannot be converted to a GeoPandas GeoDataFrame object." ) + if geom_filter is not None: + features = features[features.geom_type.isin(geom_filter)] + time_columns = ["@validFrom", "@validTo", "@snapshotTimestamp", "@timestamp"] existing_time_columns = features.columns.intersection(time_columns) features[existing_time_columns] = features[existing_time_columns].apply( diff --git a/ohsome/test/data/mixed_geometries.geojson b/ohsome/test/data/mixed_geometries.geojson new file mode 100644 index 0000000..64d4ccc --- /dev/null +++ b/ohsome/test/data/mixed_geometries.geojson @@ -0,0 +1,46 @@ +{ + "attribution": { + "url": "https://ohsome.org/copyrights", + "text": "\u00a9 OpenStreetMap contributors" + }, + "apiVersion": "1.10.4", + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "coordinates": [ + [ + 12.30001864618336, + 48.220125234580934 + ], + [ + 12.30022996627983, + 48.22026810127991 + ] + ], + "type": "LineString" + }, + "properties": { + "@osmId": "way/26936052", + "@snapshotTimestamp": "2016-01-01T00:00:00Z", + "c": "d" + } + }, + { + "type": "Feature", + "geometry": { + "coordinates": [ + 12.3003682566372, + 48.22021116170123 + ], + "type": "Point" + }, + "properties": { + "@osmId": "way/26936051", + "@snapshotTimestamp": "2016-01-01T00:00:00Z", + "a": "b" + } + } + ] +} diff --git a/ohsome/test/test_client.py b/ohsome/test/test_client.py index 25488a3..f9f6573 100644 --- a/ohsome/test/test_client.py +++ b/ohsome/test/test_client.py @@ -3,12 +3,16 @@ """Tests for ohsome client""" import datetime as dt +import json import logging import os +from copy import deepcopy +from pathlib import Path import geopandas as gpd import pandas as pd import pytest +import responses import ohsome from ohsome import OhsomeClient @@ -317,3 +321,22 @@ def test_none_init(): user_agent=None, retry=None, ) + + +def test_accepted_geom_type(base_client): + """Test geom-type filtering in case https://github.com/GIScience/ohsome-api/issues/339 is experienced.""" + client = deepcopy(base_client) + with responses.RequestsMock() as rsps: + data = Path(f"{script_path}/data/mixed_geometries.geojson") + rsps.post( + "https://api.ohsome.org/v1/elements/geometry", + json=json.loads(data.read_text()), + ) + + bboxes = "8.7137,49.4096,8.717,49.4119" + time = "2015-01-01" + flt = "geometry:line" + + response = client.elements.geometry.post(bboxes=bboxes, time=time, filter=flt) + result = response.as_dataframe(geometry_filter=["LineString"]) + assert result.geometry.type.to_list() == ["LineString"] From 1a6acfe9d5f2eda02b5a3ba9d64a4b0be61e9261 Mon Sep 17 00:00:00 2001 From: Moritz Schott Date: Thu, 16 Oct 2025 13:41:32 +0200 Subject: [PATCH 2/2] refactor: reformat changelog --- CHANGELOG.md | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e24f91b..6bafbfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,55 +44,60 @@ ### Changed -- if tags are supplied for explosion in `response.as_dataframe`, the respective column will always be present in the resulting Geodataframe, even if the tags were not part of the result. In that case the column will be all-None ([#149](https://github.com/GIScience/ohsome-py/issues/149)). - +- if tags are supplied for explosion in `response.as_dataframe`, the respective column will always be present in the + resulting Geodataframe, even if the tags were not part of the result. In that case the column will be + all-None ([#149](https://github.com/GIScience/ohsome-py/issues/149)). ## [0.3.1](https://github.com/GIScience/ohsome-py/releases/tag/v0.3.1) ### Fixed - - prevent an exception if the `log_dir` for the `OhsomeClient` was set to `None` - - removed time-dependency of unit tests that would cause them to fail at any time after the cassettes were recorded +- prevent an exception if the `log_dir` for the `OhsomeClient` was set to `None` +- removed time-dependency of unit tests that would cause them to fail at any time after the cassettes were recorded ### Changed - - relaxed dependency requirement for `urllib3` to >=2.0.2 to prevent ohsome-py from becoming a 'diamond-dependency' - - improved and sped up testing (first steps towards [#139](https://github.com/GIScience/ohsome-py/issues/139)) - - move metadata property from singleton to `cached_property` +- relaxed dependency requirement for `urllib3` to >=2.0.2 to prevent ohsome-py from becoming a 'diamond-dependency' +- improved and sped up testing (first steps towards [#139](https://github.com/GIScience/ohsome-py/issues/139)) +- move metadata property from singleton to `cached_property` ## [0.3.0](https://github.com/GIScience/ohsome-py/releases/tag/v0.3.0) ### Added - - support for python 3.12 - - custom [retry](https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.Retry) configuration - - start and end timestamp meta information of the client are now datetime objects - - accept shapely Polygon and MultiPolygon for `bpolys` input parameter - - if a request fails a bash script containing the respective `curl` command is logged (if possible). This allows for easier debugging and sharing of failed requests. - - timestamps are converted without timezone information. Deviates from Ohsome API [(Issue #318)](https://github.com/GIScience/ohsome-api/issues/318) +- support for python 3.12 +- custom [retry](https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.Retry) configuration +- start and end timestamp meta information of the client are now datetime objects +- accept shapely Polygon and MultiPolygon for `bpolys` input parameter +- if a request fails a bash script containing the respective `curl` command is logged (if possible). This allows for + easier debugging and sharing of failed requests. +- timestamps are converted without timezone information. Deviates from Ohsome + API [(Issue #318)](https://github.com/GIScience/ohsome-api/issues/318) ### Changed - - breaking: geodataframes now contain a `@other_tags` colum containing all OSM tags. This behaviour can be adapted using the `explode_tags` parameter that allows to specify tags that should be in a separate column or to disable the feature completely. The latter will result in a potentially wide but sparse data frame. +- breaking: geodataframes now contain a `@other_tags` colum containing all OSM tags. This behaviour can be adapted using + the `explode_tags` parameter that allows to specify tags that should be in a separate column or to disable the feature + completely. The latter will result in a potentially wide but sparse data frame. ### Removed - - support for python < 3.10 - - support for geopandas < 0.14 - - support for pandas < 2.1 - - support for urllib3 < 2.1 +- support for python < 3.10 +- support for geopandas < 0.14 +- support for pandas < 2.1 +- support for urllib3 < 2.1 ## [0.2.0](https://github.com/GIScience/ohsome-py/releases/tag/v0.2.0) ### Added - - support for python 3.11 - - support for geopandas up to v0.12.0 +- support for python 3.11 +- support for geopandas up to v0.12.0 ### Removed - - support for python 3.7 +- support for python 3.7 ### Fixed - - wrong formatting of list parameters for ohsome requests if not given as string +- wrong formatting of list parameters for ohsome requests if not given as string